• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 #define IN_LIBXML
34 #include "libxml.h"
35 
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41 
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
82 
83 static void
84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85 
86 /************************************************************************
87  *									*
88  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
89  *									*
90  ************************************************************************/
91 
92 #define XML_PARSER_BIG_ENTITY 1000
93 #define XML_PARSER_LOT_ENTITY 5000
94 
95 /*
96  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
97  *    replacement over the size in byte of the input indicates that you have
98  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
99  *    replacement per byte of input.
100  */
101 #define XML_PARSER_NON_LINEAR 10
102 
103 /*
104  * xmlParserEntityCheck
105  *
106  * Function to check non-linear entity expansion behaviour
107  * This is here to detect and stop exponential linear entity expansion
108  * This is not a limitation of the parser but a safety
109  * boundary feature. It can be disabled with the XML_PARSE_HUGE
110  * parser option.
111  */
112 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long size,xmlEntityPtr ent)113 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
114                      xmlEntityPtr ent)
115 {
116     unsigned long consumed = 0;
117 
118     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
119         return (0);
120     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
121         return (1);
122     if (size != 0) {
123         /*
124          * Do the check based on the replacement size of the entity
125          */
126         if (size < XML_PARSER_BIG_ENTITY)
127 	    return(0);
128 
129         /*
130          * A limit on the amount of text data reasonably used
131          */
132         if (ctxt->input != NULL) {
133             consumed = ctxt->input->consumed +
134                 (ctxt->input->cur - ctxt->input->base);
135         }
136         consumed += ctxt->sizeentities;
137 
138         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
139 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
140             return (0);
141     } else if (ent != NULL) {
142         /*
143          * use the number of parsed entities in the replacement
144          */
145         size = ent->checked;
146 
147         /*
148          * The amount of data parsed counting entities size only once
149          */
150         if (ctxt->input != NULL) {
151             consumed = ctxt->input->consumed +
152                 (ctxt->input->cur - ctxt->input->base);
153         }
154         consumed += ctxt->sizeentities;
155 
156         /*
157          * Check the density of entities for the amount of data
158 	 * knowing an entity reference will take at least 3 bytes
159          */
160         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
161             return (0);
162     } else {
163         /*
164          * strange we got no data for checking just return
165          */
166         return (0);
167     }
168 
169     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
170     return (1);
171 }
172 
173 /**
174  * xmlParserMaxDepth:
175  *
176  * arbitrary depth limit for the XML documents that we allow to
177  * process. This is not a limitation of the parser but a safety
178  * boundary feature. It can be disabled with the XML_PARSE_HUGE
179  * parser option.
180  */
181 unsigned int xmlParserMaxDepth = 256;
182 
183 
184 
185 #define SAX2 1
186 #define XML_PARSER_BIG_BUFFER_SIZE 300
187 #define XML_PARSER_BUFFER_SIZE 100
188 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
189 
190 /*
191  * List of XML prefixed PI allowed by W3C specs
192  */
193 
194 static const char *xmlW3CPIs[] = {
195     "xml-stylesheet",
196     NULL
197 };
198 
199 
200 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
201 xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202                                        const xmlChar **str);
203 
204 static xmlParserErrors
205 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206 	              xmlSAXHandlerPtr sax,
207 		      void *user_data, int depth, const xmlChar *URL,
208 		      const xmlChar *ID, xmlNodePtr *list);
209 
210 static int
211 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
212                           const char *encoding);
213 #ifdef LIBXML_LEGACY_ENABLED
214 static void
215 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
216                       xmlNodePtr lastNode);
217 #endif /* LIBXML_LEGACY_ENABLED */
218 
219 static xmlParserErrors
220 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
221 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
222 
223 static int
224 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
225 
226 /************************************************************************
227  *									*
228  * 		Some factorized error routines				*
229  *									*
230  ************************************************************************/
231 
232 /**
233  * xmlErrAttributeDup:
234  * @ctxt:  an XML parser context
235  * @prefix:  the attribute prefix
236  * @localname:  the attribute localname
237  *
238  * Handle a redefinition of attribute error
239  */
240 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)241 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
242                    const xmlChar * localname)
243 {
244     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
245         (ctxt->instate == XML_PARSER_EOF))
246 	return;
247     if (ctxt != NULL)
248 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
249     if (prefix == NULL)
250         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
251                         ctxt->errNo, XML_ERR_FATAL, NULL, 0,
252                         (const char *) localname, NULL, NULL, 0, 0,
253                         "Attribute %s redefined\n", localname);
254     else
255         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256                         ctxt->errNo, XML_ERR_FATAL, NULL, 0,
257                         (const char *) prefix, (const char *) localname,
258                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
259                         localname);
260     if (ctxt != NULL) {
261 	ctxt->wellFormed = 0;
262 	if (ctxt->recovery == 0)
263 	    ctxt->disableSAX = 1;
264     }
265 }
266 
267 /**
268  * xmlFatalErr:
269  * @ctxt:  an XML parser context
270  * @error:  the error number
271  * @extra:  extra information string
272  *
273  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
274  */
275 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)276 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
277 {
278     const char *errmsg;
279 
280     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
281         (ctxt->instate == XML_PARSER_EOF))
282 	return;
283     switch (error) {
284         case XML_ERR_INVALID_HEX_CHARREF:
285             errmsg = "CharRef: invalid hexadecimal value\n";
286             break;
287         case XML_ERR_INVALID_DEC_CHARREF:
288             errmsg = "CharRef: invalid decimal value\n";
289             break;
290         case XML_ERR_INVALID_CHARREF:
291             errmsg = "CharRef: invalid value\n";
292             break;
293         case XML_ERR_INTERNAL_ERROR:
294             errmsg = "internal error";
295             break;
296         case XML_ERR_PEREF_AT_EOF:
297             errmsg = "PEReference at end of document\n";
298             break;
299         case XML_ERR_PEREF_IN_PROLOG:
300             errmsg = "PEReference in prolog\n";
301             break;
302         case XML_ERR_PEREF_IN_EPILOG:
303             errmsg = "PEReference in epilog\n";
304             break;
305         case XML_ERR_PEREF_NO_NAME:
306             errmsg = "PEReference: no name\n";
307             break;
308         case XML_ERR_PEREF_SEMICOL_MISSING:
309             errmsg = "PEReference: expecting ';'\n";
310             break;
311         case XML_ERR_ENTITY_LOOP:
312             errmsg = "Detected an entity reference loop\n";
313             break;
314         case XML_ERR_ENTITY_NOT_STARTED:
315             errmsg = "EntityValue: \" or ' expected\n";
316             break;
317         case XML_ERR_ENTITY_PE_INTERNAL:
318             errmsg = "PEReferences forbidden in internal subset\n";
319             break;
320         case XML_ERR_ENTITY_NOT_FINISHED:
321             errmsg = "EntityValue: \" or ' expected\n";
322             break;
323         case XML_ERR_ATTRIBUTE_NOT_STARTED:
324             errmsg = "AttValue: \" or ' expected\n";
325             break;
326         case XML_ERR_LT_IN_ATTRIBUTE:
327             errmsg = "Unescaped '<' not allowed in attributes values\n";
328             break;
329         case XML_ERR_LITERAL_NOT_STARTED:
330             errmsg = "SystemLiteral \" or ' expected\n";
331             break;
332         case XML_ERR_LITERAL_NOT_FINISHED:
333             errmsg = "Unfinished System or Public ID \" or ' expected\n";
334             break;
335         case XML_ERR_MISPLACED_CDATA_END:
336             errmsg = "Sequence ']]>' not allowed in content\n";
337             break;
338         case XML_ERR_URI_REQUIRED:
339             errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
340             break;
341         case XML_ERR_PUBID_REQUIRED:
342             errmsg = "PUBLIC, the Public Identifier is missing\n";
343             break;
344         case XML_ERR_HYPHEN_IN_COMMENT:
345             errmsg = "Comment must not contain '--' (double-hyphen)\n";
346             break;
347         case XML_ERR_PI_NOT_STARTED:
348             errmsg = "xmlParsePI : no target name\n";
349             break;
350         case XML_ERR_RESERVED_XML_NAME:
351             errmsg = "Invalid PI name\n";
352             break;
353         case XML_ERR_NOTATION_NOT_STARTED:
354             errmsg = "NOTATION: Name expected here\n";
355             break;
356         case XML_ERR_NOTATION_NOT_FINISHED:
357             errmsg = "'>' required to close NOTATION declaration\n";
358             break;
359         case XML_ERR_VALUE_REQUIRED:
360             errmsg = "Entity value required\n";
361             break;
362         case XML_ERR_URI_FRAGMENT:
363             errmsg = "Fragment not allowed";
364             break;
365         case XML_ERR_ATTLIST_NOT_STARTED:
366             errmsg = "'(' required to start ATTLIST enumeration\n";
367             break;
368         case XML_ERR_NMTOKEN_REQUIRED:
369             errmsg = "NmToken expected in ATTLIST enumeration\n";
370             break;
371         case XML_ERR_ATTLIST_NOT_FINISHED:
372             errmsg = "')' required to finish ATTLIST enumeration\n";
373             break;
374         case XML_ERR_MIXED_NOT_STARTED:
375             errmsg = "MixedContentDecl : '|' or ')*' expected\n";
376             break;
377         case XML_ERR_PCDATA_REQUIRED:
378             errmsg = "MixedContentDecl : '#PCDATA' expected\n";
379             break;
380         case XML_ERR_ELEMCONTENT_NOT_STARTED:
381             errmsg = "ContentDecl : Name or '(' expected\n";
382             break;
383         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
384             errmsg = "ContentDecl : ',' '|' or ')' expected\n";
385             break;
386         case XML_ERR_PEREF_IN_INT_SUBSET:
387             errmsg =
388                 "PEReference: forbidden within markup decl in internal subset\n";
389             break;
390         case XML_ERR_GT_REQUIRED:
391             errmsg = "expected '>'\n";
392             break;
393         case XML_ERR_CONDSEC_INVALID:
394             errmsg = "XML conditional section '[' expected\n";
395             break;
396         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
397             errmsg = "Content error in the external subset\n";
398             break;
399         case XML_ERR_CONDSEC_INVALID_KEYWORD:
400             errmsg =
401                 "conditional section INCLUDE or IGNORE keyword expected\n";
402             break;
403         case XML_ERR_CONDSEC_NOT_FINISHED:
404             errmsg = "XML conditional section not closed\n";
405             break;
406         case XML_ERR_XMLDECL_NOT_STARTED:
407             errmsg = "Text declaration '<?xml' required\n";
408             break;
409         case XML_ERR_XMLDECL_NOT_FINISHED:
410             errmsg = "parsing XML declaration: '?>' expected\n";
411             break;
412         case XML_ERR_EXT_ENTITY_STANDALONE:
413             errmsg = "external parsed entities cannot be standalone\n";
414             break;
415         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
416             errmsg = "EntityRef: expecting ';'\n";
417             break;
418         case XML_ERR_DOCTYPE_NOT_FINISHED:
419             errmsg = "DOCTYPE improperly terminated\n";
420             break;
421         case XML_ERR_LTSLASH_REQUIRED:
422             errmsg = "EndTag: '</' not found\n";
423             break;
424         case XML_ERR_EQUAL_REQUIRED:
425             errmsg = "expected '='\n";
426             break;
427         case XML_ERR_STRING_NOT_CLOSED:
428             errmsg = "String not closed expecting \" or '\n";
429             break;
430         case XML_ERR_STRING_NOT_STARTED:
431             errmsg = "String not started expecting ' or \"\n";
432             break;
433         case XML_ERR_ENCODING_NAME:
434             errmsg = "Invalid XML encoding name\n";
435             break;
436         case XML_ERR_STANDALONE_VALUE:
437             errmsg = "standalone accepts only 'yes' or 'no'\n";
438             break;
439         case XML_ERR_DOCUMENT_EMPTY:
440             errmsg = "Document is empty\n";
441             break;
442         case XML_ERR_DOCUMENT_END:
443             errmsg = "Extra content at the end of the document\n";
444             break;
445         case XML_ERR_NOT_WELL_BALANCED:
446             errmsg = "chunk is not well balanced\n";
447             break;
448         case XML_ERR_EXTRA_CONTENT:
449             errmsg = "extra content at the end of well balanced chunk\n";
450             break;
451         case XML_ERR_VERSION_MISSING:
452             errmsg = "Malformed declaration expecting version\n";
453             break;
454 #if 0
455         case:
456             errmsg = "\n";
457             break;
458 #endif
459         default:
460             errmsg = "Unregistered error message\n";
461     }
462     if (ctxt != NULL)
463 	ctxt->errNo = error;
464     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
465                     XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
466                     info);
467     if (ctxt != NULL) {
468 	ctxt->wellFormed = 0;
469 	if (ctxt->recovery == 0)
470 	    ctxt->disableSAX = 1;
471     }
472 }
473 
474 /**
475  * xmlFatalErrMsg:
476  * @ctxt:  an XML parser context
477  * @error:  the error number
478  * @msg:  the error message
479  *
480  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
481  */
482 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)483 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
484                const char *msg)
485 {
486     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
487         (ctxt->instate == XML_PARSER_EOF))
488 	return;
489     if (ctxt != NULL)
490 	ctxt->errNo = error;
491     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
492                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
493     if (ctxt != NULL) {
494 	ctxt->wellFormed = 0;
495 	if (ctxt->recovery == 0)
496 	    ctxt->disableSAX = 1;
497     }
498 }
499 
500 /**
501  * xmlWarningMsg:
502  * @ctxt:  an XML parser context
503  * @error:  the error number
504  * @msg:  the error message
505  * @str1:  extra data
506  * @str2:  extra data
507  *
508  * Handle a warning.
509  */
510 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)511 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512               const char *msg, const xmlChar *str1, const xmlChar *str2)
513 {
514     xmlStructuredErrorFunc schannel = NULL;
515 
516     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
517         (ctxt->instate == XML_PARSER_EOF))
518 	return;
519     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
520         (ctxt->sax->initialized == XML_SAX2_MAGIC))
521         schannel = ctxt->sax->serror;
522     __xmlRaiseError(schannel,
523                     (ctxt->sax) ? ctxt->sax->warning : NULL,
524                     ctxt->userData,
525                     ctxt, NULL, XML_FROM_PARSER, error,
526                     XML_ERR_WARNING, NULL, 0,
527 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
528 		    msg, (const char *) str1, (const char *) str2);
529 }
530 
531 /**
532  * xmlValidityError:
533  * @ctxt:  an XML parser context
534  * @error:  the error number
535  * @msg:  the error message
536  * @str1:  extra data
537  *
538  * Handle a validity error.
539  */
540 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)541 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
542               const char *msg, const xmlChar *str1, const xmlChar *str2)
543 {
544     xmlStructuredErrorFunc schannel = NULL;
545 
546     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
547         (ctxt->instate == XML_PARSER_EOF))
548 	return;
549     if (ctxt != NULL) {
550 	ctxt->errNo = error;
551 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
552 	    schannel = ctxt->sax->serror;
553     }
554     __xmlRaiseError(schannel,
555                     ctxt->vctxt.error, ctxt->vctxt.userData,
556                     ctxt, NULL, XML_FROM_DTD, error,
557                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
558 		    (const char *) str2, NULL, 0, 0,
559 		    msg, (const char *) str1, (const char *) str2);
560     if (ctxt != NULL) {
561 	ctxt->valid = 0;
562     }
563 }
564 
565 /**
566  * xmlFatalErrMsgInt:
567  * @ctxt:  an XML parser context
568  * @error:  the error number
569  * @msg:  the error message
570  * @val:  an integer value
571  *
572  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
573  */
574 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)575 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
576                   const char *msg, int val)
577 {
578     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
579         (ctxt->instate == XML_PARSER_EOF))
580 	return;
581     if (ctxt != NULL)
582 	ctxt->errNo = error;
583     __xmlRaiseError(NULL, NULL, NULL,
584                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
585                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
586     if (ctxt != NULL) {
587 	ctxt->wellFormed = 0;
588 	if (ctxt->recovery == 0)
589 	    ctxt->disableSAX = 1;
590     }
591 }
592 
593 /**
594  * xmlFatalErrMsgStrIntStr:
595  * @ctxt:  an XML parser context
596  * @error:  the error number
597  * @msg:  the error message
598  * @str1:  an string info
599  * @val:  an integer value
600  * @str2:  an string info
601  *
602  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603  */
604 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)605 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606                   const char *msg, const xmlChar *str1, int val,
607 		  const xmlChar *str2)
608 {
609     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
610         (ctxt->instate == XML_PARSER_EOF))
611 	return;
612     if (ctxt != NULL)
613 	ctxt->errNo = error;
614     __xmlRaiseError(NULL, NULL, NULL,
615                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
616                     NULL, 0, (const char *) str1, (const char *) str2,
617 		    NULL, val, 0, msg, str1, val, str2);
618     if (ctxt != NULL) {
619 	ctxt->wellFormed = 0;
620 	if (ctxt->recovery == 0)
621 	    ctxt->disableSAX = 1;
622     }
623 }
624 
625 /**
626  * xmlFatalErrMsgStr:
627  * @ctxt:  an XML parser context
628  * @error:  the error number
629  * @msg:  the error message
630  * @val:  a string value
631  *
632  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
633  */
634 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)635 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
636                   const char *msg, const xmlChar * val)
637 {
638     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639         (ctxt->instate == XML_PARSER_EOF))
640 	return;
641     if (ctxt != NULL)
642 	ctxt->errNo = error;
643     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
644                     XML_FROM_PARSER, error, XML_ERR_FATAL,
645                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
646                     val);
647     if (ctxt != NULL) {
648 	ctxt->wellFormed = 0;
649 	if (ctxt->recovery == 0)
650 	    ctxt->disableSAX = 1;
651     }
652 }
653 
654 /**
655  * xmlErrMsgStr:
656  * @ctxt:  an XML parser context
657  * @error:  the error number
658  * @msg:  the error message
659  * @val:  a string value
660  *
661  * Handle a non fatal parser error
662  */
663 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)664 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
665                   const char *msg, const xmlChar * val)
666 {
667     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
668         (ctxt->instate == XML_PARSER_EOF))
669 	return;
670     if (ctxt != NULL)
671 	ctxt->errNo = error;
672     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
673                     XML_FROM_PARSER, error, XML_ERR_ERROR,
674                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
675                     val);
676 }
677 
678 /**
679  * xmlNsErr:
680  * @ctxt:  an XML parser context
681  * @error:  the error number
682  * @msg:  the message
683  * @info1:  extra information string
684  * @info2:  extra information string
685  *
686  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
687  */
688 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)689 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
690          const char *msg,
691          const xmlChar * info1, const xmlChar * info2,
692          const xmlChar * info3)
693 {
694     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
695         (ctxt->instate == XML_PARSER_EOF))
696 	return;
697     if (ctxt != NULL)
698 	ctxt->errNo = error;
699     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
700                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
701                     (const char *) info2, (const char *) info3, 0, 0, msg,
702                     info1, info2, info3);
703     if (ctxt != NULL)
704 	ctxt->nsWellFormed = 0;
705 }
706 
707 /**
708  * xmlNsWarn
709  * @ctxt:  an XML parser context
710  * @error:  the error number
711  * @msg:  the message
712  * @info1:  extra information string
713  * @info2:  extra information string
714  *
715  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
716  */
717 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)718 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
719          const char *msg,
720          const xmlChar * info1, const xmlChar * info2,
721          const xmlChar * info3)
722 {
723     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
724         (ctxt->instate == XML_PARSER_EOF))
725 	return;
726     if (ctxt != NULL)
727 	ctxt->errNo = error;
728     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
729                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
730                     (const char *) info2, (const char *) info3, 0, 0, msg,
731                     info1, info2, info3);
732 }
733 
734 /************************************************************************
735  *									*
736  * 		Library wide options					*
737  *									*
738  ************************************************************************/
739 
740 /**
741   * xmlHasFeature:
742   * @feature: the feature to be examined
743   *
744   * Examines if the library has been compiled with a given feature.
745   *
746   * Returns a non-zero value if the feature exist, otherwise zero.
747   * Returns zero (0) if the feature does not exist or an unknown
748   * unknown feature is requested, non-zero otherwise.
749   */
750 int
xmlHasFeature(xmlFeature feature)751 xmlHasFeature(xmlFeature feature)
752 {
753     switch (feature) {
754 	case XML_WITH_THREAD:
755 #ifdef LIBXML_THREAD_ENABLED
756 	    return(1);
757 #else
758 	    return(0);
759 #endif
760         case XML_WITH_TREE:
761 #ifdef LIBXML_TREE_ENABLED
762             return(1);
763 #else
764             return(0);
765 #endif
766         case XML_WITH_OUTPUT:
767 #ifdef LIBXML_OUTPUT_ENABLED
768             return(1);
769 #else
770             return(0);
771 #endif
772         case XML_WITH_PUSH:
773 #ifdef LIBXML_PUSH_ENABLED
774             return(1);
775 #else
776             return(0);
777 #endif
778         case XML_WITH_READER:
779 #ifdef LIBXML_READER_ENABLED
780             return(1);
781 #else
782             return(0);
783 #endif
784         case XML_WITH_PATTERN:
785 #ifdef LIBXML_PATTERN_ENABLED
786             return(1);
787 #else
788             return(0);
789 #endif
790         case XML_WITH_WRITER:
791 #ifdef LIBXML_WRITER_ENABLED
792             return(1);
793 #else
794             return(0);
795 #endif
796         case XML_WITH_SAX1:
797 #ifdef LIBXML_SAX1_ENABLED
798             return(1);
799 #else
800             return(0);
801 #endif
802         case XML_WITH_FTP:
803 #ifdef LIBXML_FTP_ENABLED
804             return(1);
805 #else
806             return(0);
807 #endif
808         case XML_WITH_HTTP:
809 #ifdef LIBXML_HTTP_ENABLED
810             return(1);
811 #else
812             return(0);
813 #endif
814         case XML_WITH_VALID:
815 #ifdef LIBXML_VALID_ENABLED
816             return(1);
817 #else
818             return(0);
819 #endif
820         case XML_WITH_HTML:
821 #ifdef LIBXML_HTML_ENABLED
822             return(1);
823 #else
824             return(0);
825 #endif
826         case XML_WITH_LEGACY:
827 #ifdef LIBXML_LEGACY_ENABLED
828             return(1);
829 #else
830             return(0);
831 #endif
832         case XML_WITH_C14N:
833 #ifdef LIBXML_C14N_ENABLED
834             return(1);
835 #else
836             return(0);
837 #endif
838         case XML_WITH_CATALOG:
839 #ifdef LIBXML_CATALOG_ENABLED
840             return(1);
841 #else
842             return(0);
843 #endif
844         case XML_WITH_XPATH:
845 #ifdef LIBXML_XPATH_ENABLED
846             return(1);
847 #else
848             return(0);
849 #endif
850         case XML_WITH_XPTR:
851 #ifdef LIBXML_XPTR_ENABLED
852             return(1);
853 #else
854             return(0);
855 #endif
856         case XML_WITH_XINCLUDE:
857 #ifdef LIBXML_XINCLUDE_ENABLED
858             return(1);
859 #else
860             return(0);
861 #endif
862         case XML_WITH_ICONV:
863 #ifdef LIBXML_ICONV_ENABLED
864             return(1);
865 #else
866             return(0);
867 #endif
868         case XML_WITH_ISO8859X:
869 #ifdef LIBXML_ISO8859X_ENABLED
870             return(1);
871 #else
872             return(0);
873 #endif
874         case XML_WITH_UNICODE:
875 #ifdef LIBXML_UNICODE_ENABLED
876             return(1);
877 #else
878             return(0);
879 #endif
880         case XML_WITH_REGEXP:
881 #ifdef LIBXML_REGEXP_ENABLED
882             return(1);
883 #else
884             return(0);
885 #endif
886         case XML_WITH_AUTOMATA:
887 #ifdef LIBXML_AUTOMATA_ENABLED
888             return(1);
889 #else
890             return(0);
891 #endif
892         case XML_WITH_EXPR:
893 #ifdef LIBXML_EXPR_ENABLED
894             return(1);
895 #else
896             return(0);
897 #endif
898         case XML_WITH_SCHEMAS:
899 #ifdef LIBXML_SCHEMAS_ENABLED
900             return(1);
901 #else
902             return(0);
903 #endif
904         case XML_WITH_SCHEMATRON:
905 #ifdef LIBXML_SCHEMATRON_ENABLED
906             return(1);
907 #else
908             return(0);
909 #endif
910         case XML_WITH_MODULES:
911 #ifdef LIBXML_MODULES_ENABLED
912             return(1);
913 #else
914             return(0);
915 #endif
916         case XML_WITH_DEBUG:
917 #ifdef LIBXML_DEBUG_ENABLED
918             return(1);
919 #else
920             return(0);
921 #endif
922         case XML_WITH_DEBUG_MEM:
923 #ifdef DEBUG_MEMORY_LOCATION
924             return(1);
925 #else
926             return(0);
927 #endif
928         case XML_WITH_DEBUG_RUN:
929 #ifdef LIBXML_DEBUG_RUNTIME
930             return(1);
931 #else
932             return(0);
933 #endif
934         case XML_WITH_ZLIB:
935 #ifdef LIBXML_ZLIB_ENABLED
936             return(1);
937 #else
938             return(0);
939 #endif
940         case XML_WITH_ICU:
941 #ifdef LIBXML_ICU_ENABLED
942             return(1);
943 #else
944             return(0);
945 #endif
946         default:
947 	    break;
948      }
949      return(0);
950 }
951 
952 /************************************************************************
953  *									*
954  * 		SAX2 defaulted attributes handling			*
955  *									*
956  ************************************************************************/
957 
958 /**
959  * xmlDetectSAX2:
960  * @ctxt:  an XML parser context
961  *
962  * Do the SAX2 detection and specific intialization
963  */
964 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)965 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
966     if (ctxt == NULL) return;
967 #ifdef LIBXML_SAX1_ENABLED
968     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
969         ((ctxt->sax->startElementNs != NULL) ||
970          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
971 #else
972     ctxt->sax2 = 1;
973 #endif /* LIBXML_SAX1_ENABLED */
974 
975     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
976     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
977     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
978     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
979     		(ctxt->str_xml_ns == NULL)) {
980         xmlErrMemory(ctxt, NULL);
981     }
982 }
983 
984 typedef struct _xmlDefAttrs xmlDefAttrs;
985 typedef xmlDefAttrs *xmlDefAttrsPtr;
986 struct _xmlDefAttrs {
987     int nbAttrs;	/* number of defaulted attributes on that element */
988     int maxAttrs;       /* the size of the array */
989     const xmlChar *values[5]; /* array of localname/prefix/values/external */
990 };
991 
992 /**
993  * xmlAttrNormalizeSpace:
994  * @src: the source string
995  * @dst: the target string
996  *
997  * Normalize the space in non CDATA attribute values:
998  * If the attribute type is not CDATA, then the XML processor MUST further
999  * process the normalized attribute value by discarding any leading and
1000  * trailing space (#x20) characters, and by replacing sequences of space
1001  * (#x20) characters by a single space (#x20) character.
1002  * Note that the size of dst need to be at least src, and if one doesn't need
1003  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1004  * passing src as dst is just fine.
1005  *
1006  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1007  *         is needed.
1008  */
1009 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1010 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1011 {
1012     if ((src == NULL) || (dst == NULL))
1013         return(NULL);
1014 
1015     while (*src == 0x20) src++;
1016     while (*src != 0) {
1017 	if (*src == 0x20) {
1018 	    while (*src == 0x20) src++;
1019 	    if (*src != 0)
1020 		*dst++ = 0x20;
1021 	} else {
1022 	    *dst++ = *src++;
1023 	}
1024     }
1025     *dst = 0;
1026     if (dst == src)
1027        return(NULL);
1028     return(dst);
1029 }
1030 
1031 /**
1032  * xmlAttrNormalizeSpace2:
1033  * @src: the source string
1034  *
1035  * Normalize the space in non CDATA attribute values, a slightly more complex
1036  * front end to avoid allocation problems when running on attribute values
1037  * coming from the input.
1038  *
1039  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1040  *         is needed.
1041  */
1042 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1043 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1044 {
1045     int i;
1046     int remove_head = 0;
1047     int need_realloc = 0;
1048     const xmlChar *cur;
1049 
1050     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1051         return(NULL);
1052     i = *len;
1053     if (i <= 0)
1054         return(NULL);
1055 
1056     cur = src;
1057     while (*cur == 0x20) {
1058         cur++;
1059 	remove_head++;
1060     }
1061     while (*cur != 0) {
1062 	if (*cur == 0x20) {
1063 	    cur++;
1064 	    if ((*cur == 0x20) || (*cur == 0)) {
1065 	        need_realloc = 1;
1066 		break;
1067 	    }
1068 	} else
1069 	    cur++;
1070     }
1071     if (need_realloc) {
1072         xmlChar *ret;
1073 
1074 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1075 	if (ret == NULL) {
1076 	    xmlErrMemory(ctxt, NULL);
1077 	    return(NULL);
1078 	}
1079 	xmlAttrNormalizeSpace(ret, ret);
1080 	*len = (int) strlen((const char *)ret);
1081         return(ret);
1082     } else if (remove_head) {
1083         *len -= remove_head;
1084         memmove(src, src + remove_head, 1 + *len);
1085 	return(src);
1086     }
1087     return(NULL);
1088 }
1089 
1090 /**
1091  * xmlAddDefAttrs:
1092  * @ctxt:  an XML parser context
1093  * @fullname:  the element fullname
1094  * @fullattr:  the attribute fullname
1095  * @value:  the attribute value
1096  *
1097  * Add a defaulted attribute for an element
1098  */
1099 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1100 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1101                const xmlChar *fullname,
1102                const xmlChar *fullattr,
1103                const xmlChar *value) {
1104     xmlDefAttrsPtr defaults;
1105     int len;
1106     const xmlChar *name;
1107     const xmlChar *prefix;
1108 
1109     /*
1110      * Allows to detect attribute redefinitions
1111      */
1112     if (ctxt->attsSpecial != NULL) {
1113         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1114 	    return;
1115     }
1116 
1117     if (ctxt->attsDefault == NULL) {
1118         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1119 	if (ctxt->attsDefault == NULL)
1120 	    goto mem_error;
1121     }
1122 
1123     /*
1124      * split the element name into prefix:localname , the string found
1125      * are within the DTD and then not associated to namespace names.
1126      */
1127     name = xmlSplitQName3(fullname, &len);
1128     if (name == NULL) {
1129         name = xmlDictLookup(ctxt->dict, fullname, -1);
1130 	prefix = NULL;
1131     } else {
1132         name = xmlDictLookup(ctxt->dict, name, -1);
1133 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1134     }
1135 
1136     /*
1137      * make sure there is some storage
1138      */
1139     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1140     if (defaults == NULL) {
1141         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1142 	                   (4 * 5) * sizeof(const xmlChar *));
1143 	if (defaults == NULL)
1144 	    goto mem_error;
1145 	defaults->nbAttrs = 0;
1146 	defaults->maxAttrs = 4;
1147 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1148 	                        defaults, NULL) < 0) {
1149 	    xmlFree(defaults);
1150 	    goto mem_error;
1151 	}
1152     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1153         xmlDefAttrsPtr temp;
1154 
1155         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1156 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1157 	if (temp == NULL)
1158 	    goto mem_error;
1159 	defaults = temp;
1160 	defaults->maxAttrs *= 2;
1161 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1162 	                        defaults, NULL) < 0) {
1163 	    xmlFree(defaults);
1164 	    goto mem_error;
1165 	}
1166     }
1167 
1168     /*
1169      * Split the element name into prefix:localname , the string found
1170      * are within the DTD and hen not associated to namespace names.
1171      */
1172     name = xmlSplitQName3(fullattr, &len);
1173     if (name == NULL) {
1174         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1175 	prefix = NULL;
1176     } else {
1177         name = xmlDictLookup(ctxt->dict, name, -1);
1178 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1179     }
1180 
1181     defaults->values[5 * defaults->nbAttrs] = name;
1182     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1183     /* intern the string and precompute the end */
1184     len = xmlStrlen(value);
1185     value = xmlDictLookup(ctxt->dict, value, len);
1186     defaults->values[5 * defaults->nbAttrs + 2] = value;
1187     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1188     if (ctxt->external)
1189         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1190     else
1191         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1192     defaults->nbAttrs++;
1193 
1194     return;
1195 
1196 mem_error:
1197     xmlErrMemory(ctxt, NULL);
1198     return;
1199 }
1200 
1201 /**
1202  * xmlAddSpecialAttr:
1203  * @ctxt:  an XML parser context
1204  * @fullname:  the element fullname
1205  * @fullattr:  the attribute fullname
1206  * @type:  the attribute type
1207  *
1208  * Register this attribute type
1209  */
1210 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1211 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1212 		  const xmlChar *fullname,
1213 		  const xmlChar *fullattr,
1214 		  int type)
1215 {
1216     if (ctxt->attsSpecial == NULL) {
1217         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1218 	if (ctxt->attsSpecial == NULL)
1219 	    goto mem_error;
1220     }
1221 
1222     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1223         return;
1224 
1225     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1226                      (void *) (long) type);
1227     return;
1228 
1229 mem_error:
1230     xmlErrMemory(ctxt, NULL);
1231     return;
1232 }
1233 
1234 /**
1235  * xmlCleanSpecialAttrCallback:
1236  *
1237  * Removes CDATA attributes from the special attribute table
1238  */
1239 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1240 xmlCleanSpecialAttrCallback(void *payload, void *data,
1241                             const xmlChar *fullname, const xmlChar *fullattr,
1242                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1243     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1244 
1245     if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1246         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1247     }
1248 }
1249 
1250 /**
1251  * xmlCleanSpecialAttr:
1252  * @ctxt:  an XML parser context
1253  *
1254  * Trim the list of attributes defined to remove all those of type
1255  * CDATA as they are not special. This call should be done when finishing
1256  * to parse the DTD and before starting to parse the document root.
1257  */
1258 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1259 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1260 {
1261     if (ctxt->attsSpecial == NULL)
1262         return;
1263 
1264     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1265 
1266     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1267         xmlHashFree(ctxt->attsSpecial, NULL);
1268         ctxt->attsSpecial = NULL;
1269     }
1270     return;
1271 }
1272 
1273 /**
1274  * xmlCheckLanguageID:
1275  * @lang:  pointer to the string value
1276  *
1277  * Checks that the value conforms to the LanguageID production:
1278  *
1279  * NOTE: this is somewhat deprecated, those productions were removed from
1280  *       the XML Second edition.
1281  *
1282  * [33] LanguageID ::= Langcode ('-' Subcode)*
1283  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1284  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1285  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1286  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1287  * [38] Subcode ::= ([a-z] | [A-Z])+
1288  *
1289  * Returns 1 if correct 0 otherwise
1290  **/
1291 int
xmlCheckLanguageID(const xmlChar * lang)1292 xmlCheckLanguageID(const xmlChar * lang)
1293 {
1294     const xmlChar *cur = lang;
1295 
1296     if (cur == NULL)
1297         return (0);
1298     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1299         ((cur[0] == 'I') && (cur[1] == '-'))) {
1300         /*
1301          * IANA code
1302          */
1303         cur += 2;
1304         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
1305                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1306             cur++;
1307     } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1308                ((cur[0] == 'X') && (cur[1] == '-'))) {
1309         /*
1310          * User code
1311          */
1312         cur += 2;
1313         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
1314                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1315             cur++;
1316     } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1317                ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1318         /*
1319          * ISO639
1320          */
1321         cur++;
1322         if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1323             ((cur[0] >= 'a') && (cur[0] <= 'z')))
1324             cur++;
1325         else
1326             return (0);
1327     } else
1328         return (0);
1329     while (cur[0] != 0) {       /* non input consuming */
1330         if (cur[0] != '-')
1331             return (0);
1332         cur++;
1333         if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1334             ((cur[0] >= 'a') && (cur[0] <= 'z')))
1335             cur++;
1336         else
1337             return (0);
1338         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
1339                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1340             cur++;
1341     }
1342     return (1);
1343 }
1344 
1345 /************************************************************************
1346  *									*
1347  *		Parser stacks related functions and macros		*
1348  *									*
1349  ************************************************************************/
1350 
1351 xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1352                                      const xmlChar ** str);
1353 
1354 #ifdef SAX2
1355 /**
1356  * nsPush:
1357  * @ctxt:  an XML parser context
1358  * @prefix:  the namespace prefix or NULL
1359  * @URL:  the namespace name
1360  *
1361  * Pushes a new parser namespace on top of the ns stack
1362  *
1363  * Returns -1 in case of error, -2 if the namespace should be discarded
1364  *	   and the index in the stack otherwise.
1365  */
1366 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1367 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1368 {
1369     if (ctxt->options & XML_PARSE_NSCLEAN) {
1370         int i;
1371 	for (i = 0;i < ctxt->nsNr;i += 2) {
1372 	    if (ctxt->nsTab[i] == prefix) {
1373 		/* in scope */
1374 	        if (ctxt->nsTab[i + 1] == URL)
1375 		    return(-2);
1376 		/* out of scope keep it */
1377 		break;
1378 	    }
1379 	}
1380     }
1381     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1382 	ctxt->nsMax = 10;
1383 	ctxt->nsNr = 0;
1384 	ctxt->nsTab = (const xmlChar **)
1385 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1386 	if (ctxt->nsTab == NULL) {
1387 	    xmlErrMemory(ctxt, NULL);
1388 	    ctxt->nsMax = 0;
1389             return (-1);
1390 	}
1391     } else if (ctxt->nsNr >= ctxt->nsMax) {
1392         const xmlChar ** tmp;
1393         ctxt->nsMax *= 2;
1394         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1395 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1396         if (tmp == NULL) {
1397             xmlErrMemory(ctxt, NULL);
1398 	    ctxt->nsMax /= 2;
1399             return (-1);
1400         }
1401 	ctxt->nsTab = tmp;
1402     }
1403     ctxt->nsTab[ctxt->nsNr++] = prefix;
1404     ctxt->nsTab[ctxt->nsNr++] = URL;
1405     return (ctxt->nsNr);
1406 }
1407 /**
1408  * nsPop:
1409  * @ctxt: an XML parser context
1410  * @nr:  the number to pop
1411  *
1412  * Pops the top @nr parser prefix/namespace from the ns stack
1413  *
1414  * Returns the number of namespaces removed
1415  */
1416 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1417 nsPop(xmlParserCtxtPtr ctxt, int nr)
1418 {
1419     int i;
1420 
1421     if (ctxt->nsTab == NULL) return(0);
1422     if (ctxt->nsNr < nr) {
1423         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1424         nr = ctxt->nsNr;
1425     }
1426     if (ctxt->nsNr <= 0)
1427         return (0);
1428 
1429     for (i = 0;i < nr;i++) {
1430          ctxt->nsNr--;
1431 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1432     }
1433     return(nr);
1434 }
1435 #endif
1436 
1437 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1438 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1439     const xmlChar **atts;
1440     int *attallocs;
1441     int maxatts;
1442 
1443     if (ctxt->atts == NULL) {
1444 	maxatts = 55; /* allow for 10 attrs by default */
1445 	atts = (const xmlChar **)
1446 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1447 	if (atts == NULL) goto mem_error;
1448 	ctxt->atts = atts;
1449 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1450 	if (attallocs == NULL) goto mem_error;
1451 	ctxt->attallocs = attallocs;
1452 	ctxt->maxatts = maxatts;
1453     } else if (nr + 5 > ctxt->maxatts) {
1454 	maxatts = (nr + 5) * 2;
1455 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1456 				     maxatts * sizeof(const xmlChar *));
1457 	if (atts == NULL) goto mem_error;
1458 	ctxt->atts = atts;
1459 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1460 	                             (maxatts / 5) * sizeof(int));
1461 	if (attallocs == NULL) goto mem_error;
1462 	ctxt->attallocs = attallocs;
1463 	ctxt->maxatts = maxatts;
1464     }
1465     return(ctxt->maxatts);
1466 mem_error:
1467     xmlErrMemory(ctxt, NULL);
1468     return(-1);
1469 }
1470 
1471 /**
1472  * inputPush:
1473  * @ctxt:  an XML parser context
1474  * @value:  the parser input
1475  *
1476  * Pushes a new parser input on top of the input stack
1477  *
1478  * Returns -1 in case of error, the index in the stack otherwise
1479  */
1480 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1481 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1482 {
1483     if ((ctxt == NULL) || (value == NULL))
1484         return(-1);
1485     if (ctxt->inputNr >= ctxt->inputMax) {
1486         ctxt->inputMax *= 2;
1487         ctxt->inputTab =
1488             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1489                                              ctxt->inputMax *
1490                                              sizeof(ctxt->inputTab[0]));
1491         if (ctxt->inputTab == NULL) {
1492             xmlErrMemory(ctxt, NULL);
1493 	    xmlFreeInputStream(value);
1494 	    ctxt->inputMax /= 2;
1495 	    value = NULL;
1496             return (-1);
1497         }
1498     }
1499     ctxt->inputTab[ctxt->inputNr] = value;
1500     ctxt->input = value;
1501     return (ctxt->inputNr++);
1502 }
1503 /**
1504  * inputPop:
1505  * @ctxt: an XML parser context
1506  *
1507  * Pops the top parser input from the input stack
1508  *
1509  * Returns the input just removed
1510  */
1511 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1512 inputPop(xmlParserCtxtPtr ctxt)
1513 {
1514     xmlParserInputPtr ret;
1515 
1516     if (ctxt == NULL)
1517         return(NULL);
1518     if (ctxt->inputNr <= 0)
1519         return (NULL);
1520     ctxt->inputNr--;
1521     if (ctxt->inputNr > 0)
1522         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1523     else
1524         ctxt->input = NULL;
1525     ret = ctxt->inputTab[ctxt->inputNr];
1526     ctxt->inputTab[ctxt->inputNr] = NULL;
1527     return (ret);
1528 }
1529 /**
1530  * nodePush:
1531  * @ctxt:  an XML parser context
1532  * @value:  the element node
1533  *
1534  * Pushes a new element node on top of the node stack
1535  *
1536  * Returns -1 in case of error, the index in the stack otherwise
1537  */
1538 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1539 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1540 {
1541     if (ctxt == NULL) return(0);
1542     if (ctxt->nodeNr >= ctxt->nodeMax) {
1543         xmlNodePtr *tmp;
1544 
1545 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1546                                       ctxt->nodeMax * 2 *
1547                                       sizeof(ctxt->nodeTab[0]));
1548         if (tmp == NULL) {
1549             xmlErrMemory(ctxt, NULL);
1550             return (-1);
1551         }
1552         ctxt->nodeTab = tmp;
1553 	ctxt->nodeMax *= 2;
1554     }
1555     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1556         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1557 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1558 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1559 			  xmlParserMaxDepth);
1560 	ctxt->instate = XML_PARSER_EOF;
1561 	return(-1);
1562     }
1563     ctxt->nodeTab[ctxt->nodeNr] = value;
1564     ctxt->node = value;
1565     return (ctxt->nodeNr++);
1566 }
1567 
1568 /**
1569  * nodePop:
1570  * @ctxt: an XML parser context
1571  *
1572  * Pops the top element node from the node stack
1573  *
1574  * Returns the node just removed
1575  */
1576 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1577 nodePop(xmlParserCtxtPtr ctxt)
1578 {
1579     xmlNodePtr ret;
1580 
1581     if (ctxt == NULL) return(NULL);
1582     if (ctxt->nodeNr <= 0)
1583         return (NULL);
1584     ctxt->nodeNr--;
1585     if (ctxt->nodeNr > 0)
1586         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1587     else
1588         ctxt->node = NULL;
1589     ret = ctxt->nodeTab[ctxt->nodeNr];
1590     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1591     return (ret);
1592 }
1593 
1594 #ifdef LIBXML_PUSH_ENABLED
1595 /**
1596  * nameNsPush:
1597  * @ctxt:  an XML parser context
1598  * @value:  the element name
1599  * @prefix:  the element prefix
1600  * @URI:  the element namespace name
1601  *
1602  * Pushes a new element name/prefix/URL on top of the name stack
1603  *
1604  * Returns -1 in case of error, the index in the stack otherwise
1605  */
1606 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1607 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1608            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1609 {
1610     if (ctxt->nameNr >= ctxt->nameMax) {
1611         const xmlChar * *tmp;
1612         void **tmp2;
1613         ctxt->nameMax *= 2;
1614         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1615                                     ctxt->nameMax *
1616                                     sizeof(ctxt->nameTab[0]));
1617         if (tmp == NULL) {
1618 	    ctxt->nameMax /= 2;
1619 	    goto mem_error;
1620         }
1621 	ctxt->nameTab = tmp;
1622         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1623                                     ctxt->nameMax * 3 *
1624                                     sizeof(ctxt->pushTab[0]));
1625         if (tmp2 == NULL) {
1626 	    ctxt->nameMax /= 2;
1627 	    goto mem_error;
1628         }
1629 	ctxt->pushTab = tmp2;
1630     }
1631     ctxt->nameTab[ctxt->nameNr] = value;
1632     ctxt->name = value;
1633     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1634     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1635     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1636     return (ctxt->nameNr++);
1637 mem_error:
1638     xmlErrMemory(ctxt, NULL);
1639     return (-1);
1640 }
1641 /**
1642  * nameNsPop:
1643  * @ctxt: an XML parser context
1644  *
1645  * Pops the top element/prefix/URI name from the name stack
1646  *
1647  * Returns the name just removed
1648  */
1649 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1650 nameNsPop(xmlParserCtxtPtr ctxt)
1651 {
1652     const xmlChar *ret;
1653 
1654     if (ctxt->nameNr <= 0)
1655         return (NULL);
1656     ctxt->nameNr--;
1657     if (ctxt->nameNr > 0)
1658         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1659     else
1660         ctxt->name = NULL;
1661     ret = ctxt->nameTab[ctxt->nameNr];
1662     ctxt->nameTab[ctxt->nameNr] = NULL;
1663     return (ret);
1664 }
1665 #endif /* LIBXML_PUSH_ENABLED */
1666 
1667 /**
1668  * namePush:
1669  * @ctxt:  an XML parser context
1670  * @value:  the element name
1671  *
1672  * Pushes a new element name on top of the name stack
1673  *
1674  * Returns -1 in case of error, the index in the stack otherwise
1675  */
1676 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1677 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1678 {
1679     if (ctxt == NULL) return (-1);
1680 
1681     if (ctxt->nameNr >= ctxt->nameMax) {
1682         const xmlChar * *tmp;
1683         ctxt->nameMax *= 2;
1684         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1685                                     ctxt->nameMax *
1686                                     sizeof(ctxt->nameTab[0]));
1687         if (tmp == NULL) {
1688 	    ctxt->nameMax /= 2;
1689 	    goto mem_error;
1690         }
1691 	ctxt->nameTab = tmp;
1692     }
1693     ctxt->nameTab[ctxt->nameNr] = value;
1694     ctxt->name = value;
1695     return (ctxt->nameNr++);
1696 mem_error:
1697     xmlErrMemory(ctxt, NULL);
1698     return (-1);
1699 }
1700 /**
1701  * namePop:
1702  * @ctxt: an XML parser context
1703  *
1704  * Pops the top element name from the name stack
1705  *
1706  * Returns the name just removed
1707  */
1708 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1709 namePop(xmlParserCtxtPtr ctxt)
1710 {
1711     const xmlChar *ret;
1712 
1713     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1714         return (NULL);
1715     ctxt->nameNr--;
1716     if (ctxt->nameNr > 0)
1717         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1718     else
1719         ctxt->name = NULL;
1720     ret = ctxt->nameTab[ctxt->nameNr];
1721     ctxt->nameTab[ctxt->nameNr] = NULL;
1722     return (ret);
1723 }
1724 
spacePush(xmlParserCtxtPtr ctxt,int val)1725 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1726     if (ctxt->spaceNr >= ctxt->spaceMax) {
1727         int *tmp;
1728 
1729 	ctxt->spaceMax *= 2;
1730         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1731 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1732         if (tmp == NULL) {
1733 	    xmlErrMemory(ctxt, NULL);
1734 	    ctxt->spaceMax /=2;
1735 	    return(-1);
1736 	}
1737 	ctxt->spaceTab = tmp;
1738     }
1739     ctxt->spaceTab[ctxt->spaceNr] = val;
1740     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1741     return(ctxt->spaceNr++);
1742 }
1743 
spacePop(xmlParserCtxtPtr ctxt)1744 static int spacePop(xmlParserCtxtPtr ctxt) {
1745     int ret;
1746     if (ctxt->spaceNr <= 0) return(0);
1747     ctxt->spaceNr--;
1748     if (ctxt->spaceNr > 0)
1749 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1750     else
1751         ctxt->space = &ctxt->spaceTab[0];
1752     ret = ctxt->spaceTab[ctxt->spaceNr];
1753     ctxt->spaceTab[ctxt->spaceNr] = -1;
1754     return(ret);
1755 }
1756 
1757 /*
1758  * Macros for accessing the content. Those should be used only by the parser,
1759  * and not exported.
1760  *
1761  * Dirty macros, i.e. one often need to make assumption on the context to
1762  * use them
1763  *
1764  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1765  *           To be used with extreme caution since operations consuming
1766  *           characters may move the input buffer to a different location !
1767  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1768  *           This should be used internally by the parser
1769  *           only to compare to ASCII values otherwise it would break when
1770  *           running with UTF-8 encoding.
1771  *   RAW     same as CUR but in the input buffer, bypass any token
1772  *           extraction that may have been done
1773  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1774  *           to compare on ASCII based substring.
1775  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1776  *           strings without newlines within the parser.
1777  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1778  *           defined char within the parser.
1779  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1780  *
1781  *   NEXT    Skip to the next character, this does the proper decoding
1782  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1783  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1784  *   CUR_CHAR(l) returns the current unicode character (int), set l
1785  *           to the number of xmlChars used for the encoding [0-5].
1786  *   CUR_SCHAR  same but operate on a string instead of the context
1787  *   COPY_BUF  copy the current unicode char to the target buffer, increment
1788  *            the index
1789  *   GROW, SHRINK  handling of input buffers
1790  */
1791 
1792 #define RAW (*ctxt->input->cur)
1793 #define CUR (*ctxt->input->cur)
1794 #define NXT(val) ctxt->input->cur[(val)]
1795 #define CUR_PTR ctxt->input->cur
1796 
1797 #define CMP4( s, c1, c2, c3, c4 ) \
1798   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1799     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1800 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1801   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1802 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1803   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1804 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1805   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1806 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1807   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1808 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1809   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1810     ((unsigned char *) s)[ 8 ] == c9 )
1811 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1812   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1813     ((unsigned char *) s)[ 9 ] == c10 )
1814 
1815 #define SKIP(val) do {							\
1816     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
1817     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1818     if ((*ctxt->input->cur == 0) &&					\
1819         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1820 	    xmlPopInput(ctxt);						\
1821   } while (0)
1822 
1823 #define SKIPL(val) do {							\
1824     int skipl;								\
1825     for(skipl=0; skipl<val; skipl++) {					\
1826     	if (*(ctxt->input->cur) == '\n') {				\
1827 	ctxt->input->line++; ctxt->input->col = 1;			\
1828     	} else ctxt->input->col++;					\
1829     	ctxt->nbChars++;						\
1830 	ctxt->input->cur++;						\
1831     }									\
1832     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1833     if ((*ctxt->input->cur == 0) &&					\
1834         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1835 	    xmlPopInput(ctxt);						\
1836   } while (0)
1837 
1838 #define SHRINK if ((ctxt->progressive == 0) &&				\
1839 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1840 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1841 	xmlSHRINK (ctxt);
1842 
xmlSHRINK(xmlParserCtxtPtr ctxt)1843 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1844     xmlParserInputShrink(ctxt->input);
1845     if ((*ctxt->input->cur == 0) &&
1846         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1847 	    xmlPopInput(ctxt);
1848   }
1849 
1850 #define GROW if ((ctxt->progressive == 0) &&				\
1851 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
1852 	xmlGROW (ctxt);
1853 
xmlGROW(xmlParserCtxtPtr ctxt)1854 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1855     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1856     if ((*ctxt->input->cur == 0) &&
1857         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1858 	    xmlPopInput(ctxt);
1859 }
1860 
1861 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1862 
1863 #define NEXT xmlNextChar(ctxt)
1864 
1865 #define NEXT1 {								\
1866 	ctxt->input->col++;						\
1867 	ctxt->input->cur++;						\
1868 	ctxt->nbChars++;						\
1869 	if (*ctxt->input->cur == 0)					\
1870 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
1871     }
1872 
1873 #define NEXTL(l) do {							\
1874     if (*(ctxt->input->cur) == '\n') {					\
1875 	ctxt->input->line++; ctxt->input->col = 1;			\
1876     } else ctxt->input->col++;						\
1877     ctxt->input->cur += l;				\
1878     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1879   } while (0)
1880 
1881 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1882 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1883 
1884 #define COPY_BUF(l,b,i,v)						\
1885     if (l == 1) b[i++] = (xmlChar) v;					\
1886     else i += xmlCopyCharMultiByte(&b[i],v)
1887 
1888 /**
1889  * xmlSkipBlankChars:
1890  * @ctxt:  the XML parser context
1891  *
1892  * skip all blanks character found at that point in the input streams.
1893  * It pops up finished entities in the process if allowable at that point.
1894  *
1895  * Returns the number of space chars skipped
1896  */
1897 
1898 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)1899 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1900     int res = 0;
1901 
1902     /*
1903      * It's Okay to use CUR/NEXT here since all the blanks are on
1904      * the ASCII range.
1905      */
1906     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1907 	const xmlChar *cur;
1908 	/*
1909 	 * if we are in the document content, go really fast
1910 	 */
1911 	cur = ctxt->input->cur;
1912 	while (IS_BLANK_CH(*cur)) {
1913 	    if (*cur == '\n') {
1914 		ctxt->input->line++; ctxt->input->col = 1;
1915 	    }
1916 	    cur++;
1917 	    res++;
1918 	    if (*cur == 0) {
1919 		ctxt->input->cur = cur;
1920 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1921 		cur = ctxt->input->cur;
1922 	    }
1923 	}
1924 	ctxt->input->cur = cur;
1925     } else {
1926 	int cur;
1927 	do {
1928 	    cur = CUR;
1929 	    while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1930 		NEXT;
1931 		cur = CUR;
1932 		res++;
1933 	    }
1934 	    while ((cur == 0) && (ctxt->inputNr > 1) &&
1935 		   (ctxt->instate != XML_PARSER_COMMENT)) {
1936 		xmlPopInput(ctxt);
1937 		cur = CUR;
1938 	    }
1939 	    /*
1940 	     * Need to handle support of entities branching here
1941 	     */
1942 	    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1943 	} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1944     }
1945     return(res);
1946 }
1947 
1948 /************************************************************************
1949  *									*
1950  *		Commodity functions to handle entities			*
1951  *									*
1952  ************************************************************************/
1953 
1954 /**
1955  * xmlPopInput:
1956  * @ctxt:  an XML parser context
1957  *
1958  * xmlPopInput: the current input pointed by ctxt->input came to an end
1959  *          pop it and return the next char.
1960  *
1961  * Returns the current xmlChar in the parser context
1962  */
1963 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)1964 xmlPopInput(xmlParserCtxtPtr ctxt) {
1965     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1966     if (xmlParserDebugEntities)
1967 	xmlGenericError(xmlGenericErrorContext,
1968 		"Popping input %d\n", ctxt->inputNr);
1969     xmlFreeInputStream(inputPop(ctxt));
1970     if ((*ctxt->input->cur == 0) &&
1971         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1972 	    return(xmlPopInput(ctxt));
1973     return(CUR);
1974 }
1975 
1976 /**
1977  * xmlPushInput:
1978  * @ctxt:  an XML parser context
1979  * @input:  an XML parser input fragment (entity, XML fragment ...).
1980  *
1981  * xmlPushInput: switch to a new input stream which is stacked on top
1982  *               of the previous one(s).
1983  * Returns -1 in case of error or the index in the input stack
1984  */
1985 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)1986 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1987     int ret;
1988     if (input == NULL) return(-1);
1989 
1990     if (xmlParserDebugEntities) {
1991 	if ((ctxt->input != NULL) && (ctxt->input->filename))
1992 	    xmlGenericError(xmlGenericErrorContext,
1993 		    "%s(%d): ", ctxt->input->filename,
1994 		    ctxt->input->line);
1995 	xmlGenericError(xmlGenericErrorContext,
1996 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1997     }
1998     ret = inputPush(ctxt, input);
1999     GROW;
2000     return(ret);
2001 }
2002 
2003 /**
2004  * xmlParseCharRef:
2005  * @ctxt:  an XML parser context
2006  *
2007  * parse Reference declarations
2008  *
2009  * [66] CharRef ::= '&#' [0-9]+ ';' |
2010  *                  '&#x' [0-9a-fA-F]+ ';'
2011  *
2012  * [ WFC: Legal Character ]
2013  * Characters referred to using character references must match the
2014  * production for Char.
2015  *
2016  * Returns the value parsed (as an int), 0 in case of error
2017  */
2018 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2019 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2020     unsigned int val = 0;
2021     int count = 0;
2022     unsigned int outofrange = 0;
2023 
2024     /*
2025      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2026      */
2027     if ((RAW == '&') && (NXT(1) == '#') &&
2028         (NXT(2) == 'x')) {
2029 	SKIP(3);
2030 	GROW;
2031 	while (RAW != ';') { /* loop blocked by count */
2032 	    if (count++ > 20) {
2033 		count = 0;
2034 		GROW;
2035 	    }
2036 	    if ((RAW >= '0') && (RAW <= '9'))
2037 	        val = val * 16 + (CUR - '0');
2038 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2039 	        val = val * 16 + (CUR - 'a') + 10;
2040 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2041 	        val = val * 16 + (CUR - 'A') + 10;
2042 	    else {
2043 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2044 		val = 0;
2045 		break;
2046 	    }
2047 	    if (val > 0x10FFFF)
2048 	        outofrange = val;
2049 
2050 	    NEXT;
2051 	    count++;
2052 	}
2053 	if (RAW == ';') {
2054 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2055 	    ctxt->input->col++;
2056 	    ctxt->nbChars ++;
2057 	    ctxt->input->cur++;
2058 	}
2059     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2060 	SKIP(2);
2061 	GROW;
2062 	while (RAW != ';') { /* loop blocked by count */
2063 	    if (count++ > 20) {
2064 		count = 0;
2065 		GROW;
2066 	    }
2067 	    if ((RAW >= '0') && (RAW <= '9'))
2068 	        val = val * 10 + (CUR - '0');
2069 	    else {
2070 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2071 		val = 0;
2072 		break;
2073 	    }
2074 	    if (val > 0x10FFFF)
2075 	        outofrange = val;
2076 
2077 	    NEXT;
2078 	    count++;
2079 	}
2080 	if (RAW == ';') {
2081 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2082 	    ctxt->input->col++;
2083 	    ctxt->nbChars ++;
2084 	    ctxt->input->cur++;
2085 	}
2086     } else {
2087         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2088     }
2089 
2090     /*
2091      * [ WFC: Legal Character ]
2092      * Characters referred to using character references must match the
2093      * production for Char.
2094      */
2095     if ((IS_CHAR(val) && (outofrange == 0))) {
2096         return(val);
2097     } else {
2098         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2099                           "xmlParseCharRef: invalid xmlChar value %d\n",
2100 	                  val);
2101     }
2102     return(0);
2103 }
2104 
2105 /**
2106  * xmlParseStringCharRef:
2107  * @ctxt:  an XML parser context
2108  * @str:  a pointer to an index in the string
2109  *
2110  * parse Reference declarations, variant parsing from a string rather
2111  * than an an input flow.
2112  *
2113  * [66] CharRef ::= '&#' [0-9]+ ';' |
2114  *                  '&#x' [0-9a-fA-F]+ ';'
2115  *
2116  * [ WFC: Legal Character ]
2117  * Characters referred to using character references must match the
2118  * production for Char.
2119  *
2120  * Returns the value parsed (as an int), 0 in case of error, str will be
2121  *         updated to the current value of the index
2122  */
2123 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2124 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2125     const xmlChar *ptr;
2126     xmlChar cur;
2127     unsigned int val = 0;
2128     unsigned int outofrange = 0;
2129 
2130     if ((str == NULL) || (*str == NULL)) return(0);
2131     ptr = *str;
2132     cur = *ptr;
2133     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2134 	ptr += 3;
2135 	cur = *ptr;
2136 	while (cur != ';') { /* Non input consuming loop */
2137 	    if ((cur >= '0') && (cur <= '9'))
2138 	        val = val * 16 + (cur - '0');
2139 	    else if ((cur >= 'a') && (cur <= 'f'))
2140 	        val = val * 16 + (cur - 'a') + 10;
2141 	    else if ((cur >= 'A') && (cur <= 'F'))
2142 	        val = val * 16 + (cur - 'A') + 10;
2143 	    else {
2144 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2145 		val = 0;
2146 		break;
2147 	    }
2148 	    if (val > 0x10FFFF)
2149 	        outofrange = val;
2150 
2151 	    ptr++;
2152 	    cur = *ptr;
2153 	}
2154 	if (cur == ';')
2155 	    ptr++;
2156     } else if  ((cur == '&') && (ptr[1] == '#')){
2157 	ptr += 2;
2158 	cur = *ptr;
2159 	while (cur != ';') { /* Non input consuming loops */
2160 	    if ((cur >= '0') && (cur <= '9'))
2161 	        val = val * 10 + (cur - '0');
2162 	    else {
2163 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2164 		val = 0;
2165 		break;
2166 	    }
2167 	    if (val > 0x10FFFF)
2168 	        outofrange = val;
2169 
2170 	    ptr++;
2171 	    cur = *ptr;
2172 	}
2173 	if (cur == ';')
2174 	    ptr++;
2175     } else {
2176 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2177 	return(0);
2178     }
2179     *str = ptr;
2180 
2181     /*
2182      * [ WFC: Legal Character ]
2183      * Characters referred to using character references must match the
2184      * production for Char.
2185      */
2186     if ((IS_CHAR(val) && (outofrange == 0))) {
2187         return(val);
2188     } else {
2189         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2190 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2191 			  val);
2192     }
2193     return(0);
2194 }
2195 
2196 /**
2197  * xmlNewBlanksWrapperInputStream:
2198  * @ctxt:  an XML parser context
2199  * @entity:  an Entity pointer
2200  *
2201  * Create a new input stream for wrapping
2202  * blanks around a PEReference
2203  *
2204  * Returns the new input stream or NULL
2205  */
2206 
deallocblankswrapper(xmlChar * str)2207 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2208 
2209 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2210 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2211     xmlParserInputPtr input;
2212     xmlChar *buffer;
2213     size_t length;
2214     if (entity == NULL) {
2215 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2216 	            "xmlNewBlanksWrapperInputStream entity\n");
2217 	return(NULL);
2218     }
2219     if (xmlParserDebugEntities)
2220 	xmlGenericError(xmlGenericErrorContext,
2221 		"new blanks wrapper for entity: %s\n", entity->name);
2222     input = xmlNewInputStream(ctxt);
2223     if (input == NULL) {
2224 	return(NULL);
2225     }
2226     length = xmlStrlen(entity->name) + 5;
2227     buffer = xmlMallocAtomic(length);
2228     if (buffer == NULL) {
2229 	xmlErrMemory(ctxt, NULL);
2230         xmlFree(input);
2231     	return(NULL);
2232     }
2233     buffer [0] = ' ';
2234     buffer [1] = '%';
2235     buffer [length-3] = ';';
2236     buffer [length-2] = ' ';
2237     buffer [length-1] = 0;
2238     memcpy(buffer + 2, entity->name, length - 5);
2239     input->free = deallocblankswrapper;
2240     input->base = buffer;
2241     input->cur = buffer;
2242     input->length = length;
2243     input->end = &buffer[length];
2244     return(input);
2245 }
2246 
2247 /**
2248  * xmlParserHandlePEReference:
2249  * @ctxt:  the parser context
2250  *
2251  * [69] PEReference ::= '%' Name ';'
2252  *
2253  * [ WFC: No Recursion ]
2254  * A parsed entity must not contain a recursive
2255  * reference to itself, either directly or indirectly.
2256  *
2257  * [ WFC: Entity Declared ]
2258  * In a document without any DTD, a document with only an internal DTD
2259  * subset which contains no parameter entity references, or a document
2260  * with "standalone='yes'", ...  ... The declaration of a parameter
2261  * entity must precede any reference to it...
2262  *
2263  * [ VC: Entity Declared ]
2264  * In a document with an external subset or external parameter entities
2265  * with "standalone='no'", ...  ... The declaration of a parameter entity
2266  * must precede any reference to it...
2267  *
2268  * [ WFC: In DTD ]
2269  * Parameter-entity references may only appear in the DTD.
2270  * NOTE: misleading but this is handled.
2271  *
2272  * A PEReference may have been detected in the current input stream
2273  * the handling is done accordingly to
2274  *      http://www.w3.org/TR/REC-xml#entproc
2275  * i.e.
2276  *   - Included in literal in entity values
2277  *   - Included as Parameter Entity reference within DTDs
2278  */
2279 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2280 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2281     const xmlChar *name;
2282     xmlEntityPtr entity = NULL;
2283     xmlParserInputPtr input;
2284 
2285     if (RAW != '%') return;
2286     switch(ctxt->instate) {
2287 	case XML_PARSER_CDATA_SECTION:
2288 	    return;
2289         case XML_PARSER_COMMENT:
2290 	    return;
2291 	case XML_PARSER_START_TAG:
2292 	    return;
2293 	case XML_PARSER_END_TAG:
2294 	    return;
2295         case XML_PARSER_EOF:
2296 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2297 	    return;
2298         case XML_PARSER_PROLOG:
2299 	case XML_PARSER_START:
2300 	case XML_PARSER_MISC:
2301 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2302 	    return;
2303 	case XML_PARSER_ENTITY_DECL:
2304         case XML_PARSER_CONTENT:
2305         case XML_PARSER_ATTRIBUTE_VALUE:
2306         case XML_PARSER_PI:
2307 	case XML_PARSER_SYSTEM_LITERAL:
2308 	case XML_PARSER_PUBLIC_LITERAL:
2309 	    /* we just ignore it there */
2310 	    return;
2311         case XML_PARSER_EPILOG:
2312 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2313 	    return;
2314 	case XML_PARSER_ENTITY_VALUE:
2315 	    /*
2316 	     * NOTE: in the case of entity values, we don't do the
2317 	     *       substitution here since we need the literal
2318 	     *       entity value to be able to save the internal
2319 	     *       subset of the document.
2320 	     *       This will be handled by xmlStringDecodeEntities
2321 	     */
2322 	    return;
2323         case XML_PARSER_DTD:
2324 	    /*
2325 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2326 	     * In the internal DTD subset, parameter-entity references
2327 	     * can occur only where markup declarations can occur, not
2328 	     * within markup declarations.
2329 	     * In that case this is handled in xmlParseMarkupDecl
2330 	     */
2331 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2332 		return;
2333 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2334 		return;
2335             break;
2336         case XML_PARSER_IGNORE:
2337             return;
2338     }
2339 
2340     NEXT;
2341     name = xmlParseName(ctxt);
2342     if (xmlParserDebugEntities)
2343 	xmlGenericError(xmlGenericErrorContext,
2344 		"PEReference: %s\n", name);
2345     if (name == NULL) {
2346 	xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2347     } else {
2348 	if (RAW == ';') {
2349 	    NEXT;
2350 	    if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2351 		entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2352 	    if (entity == NULL) {
2353 
2354 		/*
2355 		 * [ WFC: Entity Declared ]
2356 		 * In a document without any DTD, a document with only an
2357 		 * internal DTD subset which contains no parameter entity
2358 		 * references, or a document with "standalone='yes'", ...
2359 		 * ... The declaration of a parameter entity must precede
2360 		 * any reference to it...
2361 		 */
2362 		if ((ctxt->standalone == 1) ||
2363 		    ((ctxt->hasExternalSubset == 0) &&
2364 		     (ctxt->hasPErefs == 0))) {
2365 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2366 			 "PEReference: %%%s; not found\n", name);
2367 	        } else {
2368 		    /*
2369 		     * [ VC: Entity Declared ]
2370 		     * In a document with an external subset or external
2371 		     * parameter entities with "standalone='no'", ...
2372 		     * ... The declaration of a parameter entity must precede
2373 		     * any reference to it...
2374 		     */
2375 		    if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2376 		        xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2377 			                 "PEReference: %%%s; not found\n",
2378 				         name, NULL);
2379 		    } else
2380 		        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2381 			              "PEReference: %%%s; not found\n",
2382 				      name, NULL);
2383 		    ctxt->valid = 0;
2384 		}
2385 	    } else if (ctxt->input->free != deallocblankswrapper) {
2386 		    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2387 		    if (xmlPushInput(ctxt, input) < 0)
2388 		        return;
2389 	    } else {
2390 	        if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2391 		    (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2392 		    xmlChar start[4];
2393 		    xmlCharEncoding enc;
2394 
2395 		    /*
2396 		     * handle the extra spaces added before and after
2397 		     * c.f. http://www.w3.org/TR/REC-xml#as-PE
2398 		     * this is done independently.
2399 		     */
2400 		    input = xmlNewEntityInputStream(ctxt, entity);
2401 		    if (xmlPushInput(ctxt, input) < 0)
2402 		        return;
2403 
2404 		    /*
2405 		     * Get the 4 first bytes and decode the charset
2406 		     * if enc != XML_CHAR_ENCODING_NONE
2407 		     * plug some encoding conversion routines.
2408 		     * Note that, since we may have some non-UTF8
2409 		     * encoding (like UTF16, bug 135229), the 'length'
2410 		     * is not known, but we can calculate based upon
2411 		     * the amount of data in the buffer.
2412 		     */
2413 		    GROW
2414 		    if ((ctxt->input->end - ctxt->input->cur)>=4) {
2415 			start[0] = RAW;
2416 			start[1] = NXT(1);
2417 			start[2] = NXT(2);
2418 			start[3] = NXT(3);
2419 			enc = xmlDetectCharEncoding(start, 4);
2420 			if (enc != XML_CHAR_ENCODING_NONE) {
2421 			    xmlSwitchEncoding(ctxt, enc);
2422 			}
2423 		    }
2424 
2425 		    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2426 			(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2427 			(IS_BLANK_CH(NXT(5)))) {
2428 			xmlParseTextDecl(ctxt);
2429 		    }
2430 		} else {
2431 		    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2432 			     "PEReference: %s is not a parameter entity\n",
2433 				      name);
2434 		}
2435 	    }
2436 	} else {
2437 	    xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2438 	}
2439     }
2440 }
2441 
2442 /*
2443  * Macro used to grow the current buffer.
2444  */
2445 #define growBuffer(buffer, n) {						\
2446     xmlChar *tmp;							\
2447     buffer##_size *= 2;							\
2448     buffer##_size += n;							\
2449     tmp = (xmlChar *)							\
2450 		xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));	\
2451     if (tmp == NULL) goto mem_error;					\
2452     buffer = tmp;							\
2453 }
2454 
2455 /**
2456  * xmlStringLenDecodeEntities:
2457  * @ctxt:  the parser context
2458  * @str:  the input string
2459  * @len: the string length
2460  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2461  * @end:  an end marker xmlChar, 0 if none
2462  * @end2:  an end marker xmlChar, 0 if none
2463  * @end3:  an end marker xmlChar, 0 if none
2464  *
2465  * Takes a entity string content and process to do the adequate substitutions.
2466  *
2467  * [67] Reference ::= EntityRef | CharRef
2468  *
2469  * [69] PEReference ::= '%' Name ';'
2470  *
2471  * Returns A newly allocated string with the substitution done. The caller
2472  *      must deallocate it !
2473  */
2474 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2475 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2476 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2477     xmlChar *buffer = NULL;
2478     int buffer_size = 0;
2479 
2480     xmlChar *current = NULL;
2481     xmlChar *rep = NULL;
2482     const xmlChar *last;
2483     xmlEntityPtr ent;
2484     int c,l;
2485     int nbchars = 0;
2486 
2487     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2488 	return(NULL);
2489     last = str + len;
2490 
2491     if (((ctxt->depth > 40) &&
2492          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2493 	(ctxt->depth > 1024)) {
2494 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2495 	return(NULL);
2496     }
2497 
2498     /*
2499      * allocate a translation buffer.
2500      */
2501     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2502     buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2503     if (buffer == NULL) goto mem_error;
2504 
2505     /*
2506      * OK loop until we reach one of the ending char or a size limit.
2507      * we are operating on already parsed values.
2508      */
2509     if (str < last)
2510 	c = CUR_SCHAR(str, l);
2511     else
2512         c = 0;
2513     while ((c != 0) && (c != end) && /* non input consuming loop */
2514 	   (c != end2) && (c != end3)) {
2515 
2516 	if (c == 0) break;
2517         if ((c == '&') && (str[1] == '#')) {
2518 	    int val = xmlParseStringCharRef(ctxt, &str);
2519 	    if (val != 0) {
2520 		COPY_BUF(0,buffer,nbchars,val);
2521 	    }
2522 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2523 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2524 	    }
2525 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2526 	    if (xmlParserDebugEntities)
2527 		xmlGenericError(xmlGenericErrorContext,
2528 			"String decoding Entity Reference: %.30s\n",
2529 			str);
2530 	    ent = xmlParseStringEntityRef(ctxt, &str);
2531 	    if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2532 	        (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2533 	        goto int_error;
2534 	    if (ent != NULL)
2535 	        ctxt->nbentities += ent->checked;
2536 	    if ((ent != NULL) &&
2537 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2538 		if (ent->content != NULL) {
2539 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2540 		    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2541 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2542 		    }
2543 		} else {
2544 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2545 			    "predefined entity has no content\n");
2546 		}
2547 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2548 		ctxt->depth++;
2549 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2550 			                      0, 0, 0);
2551 		ctxt->depth--;
2552 
2553 		if (rep != NULL) {
2554 		    current = rep;
2555 		    while (*current != 0) { /* non input consuming loop */
2556 			buffer[nbchars++] = *current++;
2557 			if (nbchars >
2558 		            buffer_size - XML_PARSER_BUFFER_SIZE) {
2559 			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2560 				goto int_error;
2561 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2562 			}
2563 		    }
2564 		    xmlFree(rep);
2565 		    rep = NULL;
2566 		}
2567 	    } else if (ent != NULL) {
2568 		int i = xmlStrlen(ent->name);
2569 		const xmlChar *cur = ent->name;
2570 
2571 		buffer[nbchars++] = '&';
2572 		if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2573 		    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2574 		}
2575 		for (;i > 0;i--)
2576 		    buffer[nbchars++] = *cur++;
2577 		buffer[nbchars++] = ';';
2578 	    }
2579 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2580 	    if (xmlParserDebugEntities)
2581 		xmlGenericError(xmlGenericErrorContext,
2582 			"String decoding PE Reference: %.30s\n", str);
2583 	    ent = xmlParseStringPEReference(ctxt, &str);
2584 	    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2585 	        goto int_error;
2586 	    if (ent != NULL)
2587 	        ctxt->nbentities += ent->checked;
2588 	    if (ent != NULL) {
2589                 if (ent->content == NULL) {
2590 		    xmlLoadEntityContent(ctxt, ent);
2591 		}
2592 		ctxt->depth++;
2593 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2594 			                      0, 0, 0);
2595 		ctxt->depth--;
2596 		if (rep != NULL) {
2597 		    current = rep;
2598 		    while (*current != 0) { /* non input consuming loop */
2599 			buffer[nbchars++] = *current++;
2600 			if (nbchars >
2601 		            buffer_size - XML_PARSER_BUFFER_SIZE) {
2602 			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2603 			        goto int_error;
2604 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2605 			}
2606 		    }
2607 		    xmlFree(rep);
2608 		    rep = NULL;
2609 		}
2610 	    }
2611 	} else {
2612 	    COPY_BUF(l,buffer,nbchars,c);
2613 	    str += l;
2614 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2615 	      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2616 	    }
2617 	}
2618 	if (str < last)
2619 	    c = CUR_SCHAR(str, l);
2620 	else
2621 	    c = 0;
2622     }
2623     buffer[nbchars++] = 0;
2624     return(buffer);
2625 
2626 mem_error:
2627     xmlErrMemory(ctxt, NULL);
2628 int_error:
2629     if (rep != NULL)
2630         xmlFree(rep);
2631     if (buffer != NULL)
2632         xmlFree(buffer);
2633     return(NULL);
2634 }
2635 
2636 /**
2637  * xmlStringDecodeEntities:
2638  * @ctxt:  the parser context
2639  * @str:  the input string
2640  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2641  * @end:  an end marker xmlChar, 0 if none
2642  * @end2:  an end marker xmlChar, 0 if none
2643  * @end3:  an end marker xmlChar, 0 if none
2644  *
2645  * Takes a entity string content and process to do the adequate substitutions.
2646  *
2647  * [67] Reference ::= EntityRef | CharRef
2648  *
2649  * [69] PEReference ::= '%' Name ';'
2650  *
2651  * Returns A newly allocated string with the substitution done. The caller
2652  *      must deallocate it !
2653  */
2654 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2655 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2656 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2657     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2658     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2659            end, end2, end3));
2660 }
2661 
2662 /************************************************************************
2663  *									*
2664  *		Commodity functions, cleanup needed ?			*
2665  *									*
2666  ************************************************************************/
2667 
2668 /**
2669  * areBlanks:
2670  * @ctxt:  an XML parser context
2671  * @str:  a xmlChar *
2672  * @len:  the size of @str
2673  * @blank_chars: we know the chars are blanks
2674  *
2675  * Is this a sequence of blank chars that one can ignore ?
2676  *
2677  * Returns 1 if ignorable 0 otherwise.
2678  */
2679 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2680 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2681                      int blank_chars) {
2682     int i, ret;
2683     xmlNodePtr lastChild;
2684 
2685     /*
2686      * Don't spend time trying to differentiate them, the same callback is
2687      * used !
2688      */
2689     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2690 	return(0);
2691 
2692     /*
2693      * Check for xml:space value.
2694      */
2695     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2696         (*(ctxt->space) == -2))
2697 	return(0);
2698 
2699     /*
2700      * Check that the string is made of blanks
2701      */
2702     if (blank_chars == 0) {
2703 	for (i = 0;i < len;i++)
2704 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2705     }
2706 
2707     /*
2708      * Look if the element is mixed content in the DTD if available
2709      */
2710     if (ctxt->node == NULL) return(0);
2711     if (ctxt->myDoc != NULL) {
2712 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2713         if (ret == 0) return(1);
2714         if (ret == 1) return(0);
2715     }
2716 
2717     /*
2718      * Otherwise, heuristic :-\
2719      */
2720     if ((RAW != '<') && (RAW != 0xD)) return(0);
2721     if ((ctxt->node->children == NULL) &&
2722 	(RAW == '<') && (NXT(1) == '/')) return(0);
2723 
2724     lastChild = xmlGetLastChild(ctxt->node);
2725     if (lastChild == NULL) {
2726         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2727             (ctxt->node->content != NULL)) return(0);
2728     } else if (xmlNodeIsText(lastChild))
2729         return(0);
2730     else if ((ctxt->node->children != NULL) &&
2731              (xmlNodeIsText(ctxt->node->children)))
2732         return(0);
2733     return(1);
2734 }
2735 
2736 /************************************************************************
2737  *									*
2738  *		Extra stuff for namespace support			*
2739  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2740  *									*
2741  ************************************************************************/
2742 
2743 /**
2744  * xmlSplitQName:
2745  * @ctxt:  an XML parser context
2746  * @name:  an XML parser context
2747  * @prefix:  a xmlChar **
2748  *
2749  * parse an UTF8 encoded XML qualified name string
2750  *
2751  * [NS 5] QName ::= (Prefix ':')? LocalPart
2752  *
2753  * [NS 6] Prefix ::= NCName
2754  *
2755  * [NS 7] LocalPart ::= NCName
2756  *
2757  * Returns the local part, and prefix is updated
2758  *   to get the Prefix if any.
2759  */
2760 
2761 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2762 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2763     xmlChar buf[XML_MAX_NAMELEN + 5];
2764     xmlChar *buffer = NULL;
2765     int len = 0;
2766     int max = XML_MAX_NAMELEN;
2767     xmlChar *ret = NULL;
2768     const xmlChar *cur = name;
2769     int c;
2770 
2771     if (prefix == NULL) return(NULL);
2772     *prefix = NULL;
2773 
2774     if (cur == NULL) return(NULL);
2775 
2776 #ifndef XML_XML_NAMESPACE
2777     /* xml: prefix is not really a namespace */
2778     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2779         (cur[2] == 'l') && (cur[3] == ':'))
2780 	return(xmlStrdup(name));
2781 #endif
2782 
2783     /* nasty but well=formed */
2784     if (cur[0] == ':')
2785 	return(xmlStrdup(name));
2786 
2787     c = *cur++;
2788     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2789 	buf[len++] = c;
2790 	c = *cur++;
2791     }
2792     if (len >= max) {
2793 	/*
2794 	 * Okay someone managed to make a huge name, so he's ready to pay
2795 	 * for the processing speed.
2796 	 */
2797 	max = len * 2;
2798 
2799 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2800 	if (buffer == NULL) {
2801 	    xmlErrMemory(ctxt, NULL);
2802 	    return(NULL);
2803 	}
2804 	memcpy(buffer, buf, len);
2805 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2806 	    if (len + 10 > max) {
2807 	        xmlChar *tmp;
2808 
2809 		max *= 2;
2810 		tmp = (xmlChar *) xmlRealloc(buffer,
2811 						max * sizeof(xmlChar));
2812 		if (tmp == NULL) {
2813 		    xmlFree(buffer);
2814 		    xmlErrMemory(ctxt, NULL);
2815 		    return(NULL);
2816 		}
2817 		buffer = tmp;
2818 	    }
2819 	    buffer[len++] = c;
2820 	    c = *cur++;
2821 	}
2822 	buffer[len] = 0;
2823     }
2824 
2825     if ((c == ':') && (*cur == 0)) {
2826         if (buffer != NULL)
2827 	    xmlFree(buffer);
2828 	*prefix = NULL;
2829 	return(xmlStrdup(name));
2830     }
2831 
2832     if (buffer == NULL)
2833 	ret = xmlStrndup(buf, len);
2834     else {
2835 	ret = buffer;
2836 	buffer = NULL;
2837 	max = XML_MAX_NAMELEN;
2838     }
2839 
2840 
2841     if (c == ':') {
2842 	c = *cur;
2843         *prefix = ret;
2844 	if (c == 0) {
2845 	    return(xmlStrndup(BAD_CAST "", 0));
2846 	}
2847 	len = 0;
2848 
2849 	/*
2850 	 * Check that the first character is proper to start
2851 	 * a new name
2852 	 */
2853 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
2854 	      ((c >= 0x41) && (c <= 0x5A)) ||
2855 	      (c == '_') || (c == ':'))) {
2856 	    int l;
2857 	    int first = CUR_SCHAR(cur, l);
2858 
2859 	    if (!IS_LETTER(first) && (first != '_')) {
2860 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2861 			    "Name %s is not XML Namespace compliant\n",
2862 				  name);
2863 	    }
2864 	}
2865 	cur++;
2866 
2867 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2868 	    buf[len++] = c;
2869 	    c = *cur++;
2870 	}
2871 	if (len >= max) {
2872 	    /*
2873 	     * Okay someone managed to make a huge name, so he's ready to pay
2874 	     * for the processing speed.
2875 	     */
2876 	    max = len * 2;
2877 
2878 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2879 	    if (buffer == NULL) {
2880 	        xmlErrMemory(ctxt, NULL);
2881 		return(NULL);
2882 	    }
2883 	    memcpy(buffer, buf, len);
2884 	    while (c != 0) { /* tested bigname2.xml */
2885 		if (len + 10 > max) {
2886 		    xmlChar *tmp;
2887 
2888 		    max *= 2;
2889 		    tmp = (xmlChar *) xmlRealloc(buffer,
2890 						    max * sizeof(xmlChar));
2891 		    if (tmp == NULL) {
2892 			xmlErrMemory(ctxt, NULL);
2893 			xmlFree(buffer);
2894 			return(NULL);
2895 		    }
2896 		    buffer = tmp;
2897 		}
2898 		buffer[len++] = c;
2899 		c = *cur++;
2900 	    }
2901 	    buffer[len] = 0;
2902 	}
2903 
2904 	if (buffer == NULL)
2905 	    ret = xmlStrndup(buf, len);
2906 	else {
2907 	    ret = buffer;
2908 	}
2909     }
2910 
2911     return(ret);
2912 }
2913 
2914 /************************************************************************
2915  *									*
2916  *			The parser itself				*
2917  *	Relates to http://www.w3.org/TR/REC-xml				*
2918  *									*
2919  ************************************************************************/
2920 
2921 /************************************************************************
2922  *									*
2923  *	Routines to parse Name, NCName and NmToken			*
2924  *									*
2925  ************************************************************************/
2926 unsigned long nbParseName = 0;
2927 unsigned long nbParseNmToken = 0;
2928 unsigned long nbParseNCName = 0;
2929 unsigned long nbParseNCNameComplex = 0;
2930 unsigned long nbParseNameComplex = 0;
2931 unsigned long nbParseStringName = 0;
2932 /*
2933  * The two following functions are related to the change of accepted
2934  * characters for Name and NmToken in the Revision 5 of XML-1.0
2935  * They correspond to the modified production [4] and the new production [4a]
2936  * changes in that revision. Also note that the macros used for the
2937  * productions Letter, Digit, CombiningChar and Extender are not needed
2938  * anymore.
2939  * We still keep compatibility to pre-revision5 parsing semantic if the
2940  * new XML_PARSE_OLD10 option is given to the parser.
2941  */
2942 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)2943 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2944     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2945         /*
2946 	 * Use the new checks of production [4] [4a] amd [5] of the
2947 	 * Update 5 of XML-1.0
2948 	 */
2949 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2950 	    (((c >= 'a') && (c <= 'z')) ||
2951 	     ((c >= 'A') && (c <= 'Z')) ||
2952 	     (c == '_') || (c == ':') ||
2953 	     ((c >= 0xC0) && (c <= 0xD6)) ||
2954 	     ((c >= 0xD8) && (c <= 0xF6)) ||
2955 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
2956 	     ((c >= 0x370) && (c <= 0x37D)) ||
2957 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
2958 	     ((c >= 0x200C) && (c <= 0x200D)) ||
2959 	     ((c >= 0x2070) && (c <= 0x218F)) ||
2960 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2961 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
2962 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
2963 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2964 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
2965 	    return(1);
2966     } else {
2967         if (IS_LETTER(c) || (c == '_') || (c == ':'))
2968 	    return(1);
2969     }
2970     return(0);
2971 }
2972 
2973 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)2974 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2975     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2976         /*
2977 	 * Use the new checks of production [4] [4a] amd [5] of the
2978 	 * Update 5 of XML-1.0
2979 	 */
2980 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2981 	    (((c >= 'a') && (c <= 'z')) ||
2982 	     ((c >= 'A') && (c <= 'Z')) ||
2983 	     ((c >= '0') && (c <= '9')) || /* !start */
2984 	     (c == '_') || (c == ':') ||
2985 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2986 	     ((c >= 0xC0) && (c <= 0xD6)) ||
2987 	     ((c >= 0xD8) && (c <= 0xF6)) ||
2988 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
2989 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2990 	     ((c >= 0x370) && (c <= 0x37D)) ||
2991 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
2992 	     ((c >= 0x200C) && (c <= 0x200D)) ||
2993 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2994 	     ((c >= 0x2070) && (c <= 0x218F)) ||
2995 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2996 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
2997 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
2998 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2999 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3000 	     return(1);
3001     } else {
3002         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3003             (c == '.') || (c == '-') ||
3004 	    (c == '_') || (c == ':') ||
3005 	    (IS_COMBINING(c)) ||
3006 	    (IS_EXTENDER(c)))
3007 	    return(1);
3008     }
3009     return(0);
3010 }
3011 
3012 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3013                                           int *len, int *alloc, int normalize);
3014 
3015 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3016 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3017     int len = 0, l;
3018     int c;
3019     int count = 0;
3020 
3021     nbParseNameComplex++;
3022 
3023     /*
3024      * Handler for more complex cases
3025      */
3026     GROW;
3027     c = CUR_CHAR(l);
3028     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3029         /*
3030 	 * Use the new checks of production [4] [4a] amd [5] of the
3031 	 * Update 5 of XML-1.0
3032 	 */
3033 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3034 	    (!(((c >= 'a') && (c <= 'z')) ||
3035 	       ((c >= 'A') && (c <= 'Z')) ||
3036 	       (c == '_') || (c == ':') ||
3037 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3038 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3039 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3040 	       ((c >= 0x370) && (c <= 0x37D)) ||
3041 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3042 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3043 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3044 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3045 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3046 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3047 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3048 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3049 	    return(NULL);
3050 	}
3051 	len += l;
3052 	NEXTL(l);
3053 	c = CUR_CHAR(l);
3054 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3055 	       (((c >= 'a') && (c <= 'z')) ||
3056 	        ((c >= 'A') && (c <= 'Z')) ||
3057 	        ((c >= '0') && (c <= '9')) || /* !start */
3058 	        (c == '_') || (c == ':') ||
3059 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3060 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3061 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3062 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3063 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3064 	        ((c >= 0x370) && (c <= 0x37D)) ||
3065 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3066 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3067 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3068 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3069 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3070 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3071 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3072 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3073 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3074 		)) {
3075 	    if (count++ > 100) {
3076 		count = 0;
3077 		GROW;
3078 	    }
3079 	    len += l;
3080 	    NEXTL(l);
3081 	    c = CUR_CHAR(l);
3082 	}
3083     } else {
3084 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3085 	    (!IS_LETTER(c) && (c != '_') &&
3086 	     (c != ':'))) {
3087 	    return(NULL);
3088 	}
3089 	len += l;
3090 	NEXTL(l);
3091 	c = CUR_CHAR(l);
3092 
3093 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3094 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3095 		(c == '.') || (c == '-') ||
3096 		(c == '_') || (c == ':') ||
3097 		(IS_COMBINING(c)) ||
3098 		(IS_EXTENDER(c)))) {
3099 	    if (count++ > 100) {
3100 		count = 0;
3101 		GROW;
3102 	    }
3103 	    len += l;
3104 	    NEXTL(l);
3105 	    c = CUR_CHAR(l);
3106 	}
3107     }
3108     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3109         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3110     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3111 }
3112 
3113 /**
3114  * xmlParseName:
3115  * @ctxt:  an XML parser context
3116  *
3117  * parse an XML name.
3118  *
3119  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3120  *                  CombiningChar | Extender
3121  *
3122  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3123  *
3124  * [6] Names ::= Name (#x20 Name)*
3125  *
3126  * Returns the Name parsed or NULL
3127  */
3128 
3129 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3130 xmlParseName(xmlParserCtxtPtr ctxt) {
3131     const xmlChar *in;
3132     const xmlChar *ret;
3133     int count = 0;
3134 
3135     GROW;
3136 
3137     nbParseName++;
3138 
3139     /*
3140      * Accelerator for simple ASCII names
3141      */
3142     in = ctxt->input->cur;
3143     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3144 	((*in >= 0x41) && (*in <= 0x5A)) ||
3145 	(*in == '_') || (*in == ':')) {
3146 	in++;
3147 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3148 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3149 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3150 	       (*in == '_') || (*in == '-') ||
3151 	       (*in == ':') || (*in == '.'))
3152 	    in++;
3153 	if ((*in > 0) && (*in < 0x80)) {
3154 	    count = in - ctxt->input->cur;
3155 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3156 	    ctxt->input->cur = in;
3157 	    ctxt->nbChars += count;
3158 	    ctxt->input->col += count;
3159 	    if (ret == NULL)
3160 	        xmlErrMemory(ctxt, NULL);
3161 	    return(ret);
3162 	}
3163     }
3164     /* accelerator for special cases */
3165     return(xmlParseNameComplex(ctxt));
3166 }
3167 
3168 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3169 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3170     int len = 0, l;
3171     int c;
3172     int count = 0;
3173 
3174     nbParseNCNameComplex++;
3175 
3176     /*
3177      * Handler for more complex cases
3178      */
3179     GROW;
3180     c = CUR_CHAR(l);
3181     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3182 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3183 	return(NULL);
3184     }
3185 
3186     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3187 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3188 	if (count++ > 100) {
3189 	    count = 0;
3190 	    GROW;
3191 	}
3192 	len += l;
3193 	NEXTL(l);
3194 	c = CUR_CHAR(l);
3195     }
3196     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3197 }
3198 
3199 /**
3200  * xmlParseNCName:
3201  * @ctxt:  an XML parser context
3202  * @len:  lenght of the string parsed
3203  *
3204  * parse an XML name.
3205  *
3206  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3207  *                      CombiningChar | Extender
3208  *
3209  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3210  *
3211  * Returns the Name parsed or NULL
3212  */
3213 
3214 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3215 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3216     const xmlChar *in;
3217     const xmlChar *ret;
3218     int count = 0;
3219 
3220     nbParseNCName++;
3221 
3222     /*
3223      * Accelerator for simple ASCII names
3224      */
3225     in = ctxt->input->cur;
3226     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3227 	((*in >= 0x41) && (*in <= 0x5A)) ||
3228 	(*in == '_')) {
3229 	in++;
3230 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3231 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3232 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3233 	       (*in == '_') || (*in == '-') ||
3234 	       (*in == '.'))
3235 	    in++;
3236 	if ((*in > 0) && (*in < 0x80)) {
3237 	    count = in - ctxt->input->cur;
3238 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3239 	    ctxt->input->cur = in;
3240 	    ctxt->nbChars += count;
3241 	    ctxt->input->col += count;
3242 	    if (ret == NULL) {
3243 	        xmlErrMemory(ctxt, NULL);
3244 	    }
3245 	    return(ret);
3246 	}
3247     }
3248     return(xmlParseNCNameComplex(ctxt));
3249 }
3250 
3251 /**
3252  * xmlParseNameAndCompare:
3253  * @ctxt:  an XML parser context
3254  *
3255  * parse an XML name and compares for match
3256  * (specialized for endtag parsing)
3257  *
3258  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3259  * and the name for mismatch
3260  */
3261 
3262 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3263 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3264     register const xmlChar *cmp = other;
3265     register const xmlChar *in;
3266     const xmlChar *ret;
3267 
3268     GROW;
3269 
3270     in = ctxt->input->cur;
3271     while (*in != 0 && *in == *cmp) {
3272 	++in;
3273 	++cmp;
3274 	ctxt->input->col++;
3275     }
3276     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3277 	/* success */
3278 	ctxt->input->cur = in;
3279 	return (const xmlChar*) 1;
3280     }
3281     /* failure (or end of input buffer), check with full function */
3282     ret = xmlParseName (ctxt);
3283     /* strings coming from the dictionnary direct compare possible */
3284     if (ret == other) {
3285 	return (const xmlChar*) 1;
3286     }
3287     return ret;
3288 }
3289 
3290 /**
3291  * xmlParseStringName:
3292  * @ctxt:  an XML parser context
3293  * @str:  a pointer to the string pointer (IN/OUT)
3294  *
3295  * parse an XML name.
3296  *
3297  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3298  *                  CombiningChar | Extender
3299  *
3300  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3301  *
3302  * [6] Names ::= Name (#x20 Name)*
3303  *
3304  * Returns the Name parsed or NULL. The @str pointer
3305  * is updated to the current location in the string.
3306  */
3307 
3308 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3309 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3310     xmlChar buf[XML_MAX_NAMELEN + 5];
3311     const xmlChar *cur = *str;
3312     int len = 0, l;
3313     int c;
3314 
3315     nbParseStringName++;
3316 
3317     c = CUR_SCHAR(cur, l);
3318     if (!xmlIsNameStartChar(ctxt, c)) {
3319 	return(NULL);
3320     }
3321 
3322     COPY_BUF(l,buf,len,c);
3323     cur += l;
3324     c = CUR_SCHAR(cur, l);
3325     while (xmlIsNameChar(ctxt, c)) {
3326 	COPY_BUF(l,buf,len,c);
3327 	cur += l;
3328 	c = CUR_SCHAR(cur, l);
3329 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3330 	    /*
3331 	     * Okay someone managed to make a huge name, so he's ready to pay
3332 	     * for the processing speed.
3333 	     */
3334 	    xmlChar *buffer;
3335 	    int max = len * 2;
3336 
3337 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3338 	    if (buffer == NULL) {
3339 	        xmlErrMemory(ctxt, NULL);
3340 		return(NULL);
3341 	    }
3342 	    memcpy(buffer, buf, len);
3343 	    while (xmlIsNameChar(ctxt, c)) {
3344 		if (len + 10 > max) {
3345 		    xmlChar *tmp;
3346 		    max *= 2;
3347 		    tmp = (xmlChar *) xmlRealloc(buffer,
3348 			                            max * sizeof(xmlChar));
3349 		    if (tmp == NULL) {
3350 			xmlErrMemory(ctxt, NULL);
3351 			xmlFree(buffer);
3352 			return(NULL);
3353 		    }
3354 		    buffer = tmp;
3355 		}
3356 		COPY_BUF(l,buffer,len,c);
3357 		cur += l;
3358 		c = CUR_SCHAR(cur, l);
3359 	    }
3360 	    buffer[len] = 0;
3361 	    *str = cur;
3362 	    return(buffer);
3363 	}
3364     }
3365     *str = cur;
3366     return(xmlStrndup(buf, len));
3367 }
3368 
3369 /**
3370  * xmlParseNmtoken:
3371  * @ctxt:  an XML parser context
3372  *
3373  * parse an XML Nmtoken.
3374  *
3375  * [7] Nmtoken ::= (NameChar)+
3376  *
3377  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3378  *
3379  * Returns the Nmtoken parsed or NULL
3380  */
3381 
3382 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3383 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3384     xmlChar buf[XML_MAX_NAMELEN + 5];
3385     int len = 0, l;
3386     int c;
3387     int count = 0;
3388 
3389     nbParseNmToken++;
3390 
3391     GROW;
3392     c = CUR_CHAR(l);
3393 
3394     while (xmlIsNameChar(ctxt, c)) {
3395 	if (count++ > 100) {
3396 	    count = 0;
3397 	    GROW;
3398 	}
3399 	COPY_BUF(l,buf,len,c);
3400 	NEXTL(l);
3401 	c = CUR_CHAR(l);
3402 	if (len >= XML_MAX_NAMELEN) {
3403 	    /*
3404 	     * Okay someone managed to make a huge token, so he's ready to pay
3405 	     * for the processing speed.
3406 	     */
3407 	    xmlChar *buffer;
3408 	    int max = len * 2;
3409 
3410 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3411 	    if (buffer == NULL) {
3412 	        xmlErrMemory(ctxt, NULL);
3413 		return(NULL);
3414 	    }
3415 	    memcpy(buffer, buf, len);
3416 	    while (xmlIsNameChar(ctxt, c)) {
3417 		if (count++ > 100) {
3418 		    count = 0;
3419 		    GROW;
3420 		}
3421 		if (len + 10 > max) {
3422 		    xmlChar *tmp;
3423 
3424 		    max *= 2;
3425 		    tmp = (xmlChar *) xmlRealloc(buffer,
3426 			                            max * sizeof(xmlChar));
3427 		    if (tmp == NULL) {
3428 			xmlErrMemory(ctxt, NULL);
3429 			xmlFree(buffer);
3430 			return(NULL);
3431 		    }
3432 		    buffer = tmp;
3433 		}
3434 		COPY_BUF(l,buffer,len,c);
3435 		NEXTL(l);
3436 		c = CUR_CHAR(l);
3437 	    }
3438 	    buffer[len] = 0;
3439 	    return(buffer);
3440 	}
3441     }
3442     if (len == 0)
3443         return(NULL);
3444     return(xmlStrndup(buf, len));
3445 }
3446 
3447 /**
3448  * xmlParseEntityValue:
3449  * @ctxt:  an XML parser context
3450  * @orig:  if non-NULL store a copy of the original entity value
3451  *
3452  * parse a value for ENTITY declarations
3453  *
3454  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3455  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3456  *
3457  * Returns the EntityValue parsed with reference substituted or NULL
3458  */
3459 
3460 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3461 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3462     xmlChar *buf = NULL;
3463     int len = 0;
3464     int size = XML_PARSER_BUFFER_SIZE;
3465     int c, l;
3466     xmlChar stop;
3467     xmlChar *ret = NULL;
3468     const xmlChar *cur = NULL;
3469     xmlParserInputPtr input;
3470 
3471     if (RAW == '"') stop = '"';
3472     else if (RAW == '\'') stop = '\'';
3473     else {
3474 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3475 	return(NULL);
3476     }
3477     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3478     if (buf == NULL) {
3479 	xmlErrMemory(ctxt, NULL);
3480 	return(NULL);
3481     }
3482 
3483     /*
3484      * The content of the entity definition is copied in a buffer.
3485      */
3486 
3487     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3488     input = ctxt->input;
3489     GROW;
3490     NEXT;
3491     c = CUR_CHAR(l);
3492     /*
3493      * NOTE: 4.4.5 Included in Literal
3494      * When a parameter entity reference appears in a literal entity
3495      * value, ... a single or double quote character in the replacement
3496      * text is always treated as a normal data character and will not
3497      * terminate the literal.
3498      * In practice it means we stop the loop only when back at parsing
3499      * the initial entity and the quote is found
3500      */
3501     while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3502 	   (ctxt->input != input))) {
3503 	if (len + 5 >= size) {
3504 	    xmlChar *tmp;
3505 
3506 	    size *= 2;
3507 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3508 	    if (tmp == NULL) {
3509 		xmlErrMemory(ctxt, NULL);
3510 		xmlFree(buf);
3511 		return(NULL);
3512 	    }
3513 	    buf = tmp;
3514 	}
3515 	COPY_BUF(l,buf,len,c);
3516 	NEXTL(l);
3517 	/*
3518 	 * Pop-up of finished entities.
3519 	 */
3520 	while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3521 	    xmlPopInput(ctxt);
3522 
3523 	GROW;
3524 	c = CUR_CHAR(l);
3525 	if (c == 0) {
3526 	    GROW;
3527 	    c = CUR_CHAR(l);
3528 	}
3529     }
3530     buf[len] = 0;
3531 
3532     /*
3533      * Raise problem w.r.t. '&' and '%' being used in non-entities
3534      * reference constructs. Note Charref will be handled in
3535      * xmlStringDecodeEntities()
3536      */
3537     cur = buf;
3538     while (*cur != 0) { /* non input consuming */
3539 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3540 	    xmlChar *name;
3541 	    xmlChar tmp = *cur;
3542 
3543 	    cur++;
3544 	    name = xmlParseStringName(ctxt, &cur);
3545             if ((name == NULL) || (*cur != ';')) {
3546 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3547 	    "EntityValue: '%c' forbidden except for entities references\n",
3548 	                          tmp);
3549 	    }
3550 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3551 		(ctxt->inputNr == 1)) {
3552 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3553 	    }
3554 	    if (name != NULL)
3555 		xmlFree(name);
3556 	    if (*cur == 0)
3557 	        break;
3558 	}
3559 	cur++;
3560     }
3561 
3562     /*
3563      * Then PEReference entities are substituted.
3564      */
3565     if (c != stop) {
3566 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3567 	xmlFree(buf);
3568     } else {
3569 	NEXT;
3570 	/*
3571 	 * NOTE: 4.4.7 Bypassed
3572 	 * When a general entity reference appears in the EntityValue in
3573 	 * an entity declaration, it is bypassed and left as is.
3574 	 * so XML_SUBSTITUTE_REF is not set here.
3575 	 */
3576 	ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3577 				      0, 0, 0);
3578 	if (orig != NULL)
3579 	    *orig = buf;
3580 	else
3581 	    xmlFree(buf);
3582     }
3583 
3584     return(ret);
3585 }
3586 
3587 /**
3588  * xmlParseAttValueComplex:
3589  * @ctxt:  an XML parser context
3590  * @len:   the resulting attribute len
3591  * @normalize:  wether to apply the inner normalization
3592  *
3593  * parse a value for an attribute, this is the fallback function
3594  * of xmlParseAttValue() when the attribute parsing requires handling
3595  * of non-ASCII characters, or normalization compaction.
3596  *
3597  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3598  */
3599 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3600 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3601     xmlChar limit = 0;
3602     xmlChar *buf = NULL;
3603     xmlChar *rep = NULL;
3604     int len = 0;
3605     int buf_size = 0;
3606     int c, l, in_space = 0;
3607     xmlChar *current = NULL;
3608     xmlEntityPtr ent;
3609 
3610     if (NXT(0) == '"') {
3611 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3612 	limit = '"';
3613         NEXT;
3614     } else if (NXT(0) == '\'') {
3615 	limit = '\'';
3616 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3617         NEXT;
3618     } else {
3619 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3620 	return(NULL);
3621     }
3622 
3623     /*
3624      * allocate a translation buffer.
3625      */
3626     buf_size = XML_PARSER_BUFFER_SIZE;
3627     buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3628     if (buf == NULL) goto mem_error;
3629 
3630     /*
3631      * OK loop until we reach one of the ending char or a size limit.
3632      */
3633     c = CUR_CHAR(l);
3634     while ((NXT(0) != limit) && /* checked */
3635            (IS_CHAR(c)) && (c != '<')) {
3636 	if (c == 0) break;
3637 	if (c == '&') {
3638 	    in_space = 0;
3639 	    if (NXT(1) == '#') {
3640 		int val = xmlParseCharRef(ctxt);
3641 
3642 		if (val == '&') {
3643 		    if (ctxt->replaceEntities) {
3644 			if (len > buf_size - 10) {
3645 			    growBuffer(buf, 10);
3646 			}
3647 			buf[len++] = '&';
3648 		    } else {
3649 			/*
3650 			 * The reparsing will be done in xmlStringGetNodeList()
3651 			 * called by the attribute() function in SAX.c
3652 			 */
3653 			if (len > buf_size - 10) {
3654 			    growBuffer(buf, 10);
3655 			}
3656 			buf[len++] = '&';
3657 			buf[len++] = '#';
3658 			buf[len++] = '3';
3659 			buf[len++] = '8';
3660 			buf[len++] = ';';
3661 		    }
3662 		} else if (val != 0) {
3663 		    if (len > buf_size - 10) {
3664 			growBuffer(buf, 10);
3665 		    }
3666 		    len += xmlCopyChar(0, &buf[len], val);
3667 		}
3668 	    } else {
3669 		ent = xmlParseEntityRef(ctxt);
3670 		ctxt->nbentities++;
3671 		if (ent != NULL)
3672 		    ctxt->nbentities += ent->owner;
3673 		if ((ent != NULL) &&
3674 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3675 		    if (len > buf_size - 10) {
3676 			growBuffer(buf, 10);
3677 		    }
3678 		    if ((ctxt->replaceEntities == 0) &&
3679 		        (ent->content[0] == '&')) {
3680 			buf[len++] = '&';
3681 			buf[len++] = '#';
3682 			buf[len++] = '3';
3683 			buf[len++] = '8';
3684 			buf[len++] = ';';
3685 		    } else {
3686 			buf[len++] = ent->content[0];
3687 		    }
3688 		} else if ((ent != NULL) &&
3689 		           (ctxt->replaceEntities != 0)) {
3690 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3691 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3692 						      XML_SUBSTITUTE_REF,
3693 						      0, 0, 0);
3694 			if (rep != NULL) {
3695 			    current = rep;
3696 			    while (*current != 0) { /* non input consuming */
3697 				buf[len++] = *current++;
3698 				if (len > buf_size - 10) {
3699 				    growBuffer(buf, 10);
3700 				}
3701 			    }
3702 			    xmlFree(rep);
3703 			    rep = NULL;
3704 			}
3705 		    } else {
3706 			if (len > buf_size - 10) {
3707 			    growBuffer(buf, 10);
3708 			}
3709 			if (ent->content != NULL)
3710 			    buf[len++] = ent->content[0];
3711 		    }
3712 		} else if (ent != NULL) {
3713 		    int i = xmlStrlen(ent->name);
3714 		    const xmlChar *cur = ent->name;
3715 
3716 		    /*
3717 		     * This may look absurd but is needed to detect
3718 		     * entities problems
3719 		     */
3720 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3721 			(ent->content != NULL)) {
3722 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3723 						  XML_SUBSTITUTE_REF, 0, 0, 0);
3724 			if (rep != NULL) {
3725 			    xmlFree(rep);
3726 			    rep = NULL;
3727 			}
3728 		    }
3729 
3730 		    /*
3731 		     * Just output the reference
3732 		     */
3733 		    buf[len++] = '&';
3734 		    while (len > buf_size - i - 10) {
3735 			growBuffer(buf, i + 10);
3736 		    }
3737 		    for (;i > 0;i--)
3738 			buf[len++] = *cur++;
3739 		    buf[len++] = ';';
3740 		}
3741 	    }
3742 	} else {
3743 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3744 	        if ((len != 0) || (!normalize)) {
3745 		    if ((!normalize) || (!in_space)) {
3746 			COPY_BUF(l,buf,len,0x20);
3747 			while (len > buf_size - 10) {
3748 			    growBuffer(buf, 10);
3749 			}
3750 		    }
3751 		    in_space = 1;
3752 		}
3753 	    } else {
3754 	        in_space = 0;
3755 		COPY_BUF(l,buf,len,c);
3756 		if (len > buf_size - 10) {
3757 		    growBuffer(buf, 10);
3758 		}
3759 	    }
3760 	    NEXTL(l);
3761 	}
3762 	GROW;
3763 	c = CUR_CHAR(l);
3764     }
3765     if ((in_space) && (normalize)) {
3766         while (buf[len - 1] == 0x20) len--;
3767     }
3768     buf[len] = 0;
3769     if (RAW == '<') {
3770 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3771     } else if (RAW != limit) {
3772 	if ((c != 0) && (!IS_CHAR(c))) {
3773 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3774 			   "invalid character in attribute value\n");
3775 	} else {
3776 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3777 			   "AttValue: ' expected\n");
3778         }
3779     } else
3780 	NEXT;
3781     if (attlen != NULL) *attlen = len;
3782     return(buf);
3783 
3784 mem_error:
3785     xmlErrMemory(ctxt, NULL);
3786     if (buf != NULL)
3787         xmlFree(buf);
3788     if (rep != NULL)
3789         xmlFree(rep);
3790     return(NULL);
3791 }
3792 
3793 /**
3794  * xmlParseAttValue:
3795  * @ctxt:  an XML parser context
3796  *
3797  * parse a value for an attribute
3798  * Note: the parser won't do substitution of entities here, this
3799  * will be handled later in xmlStringGetNodeList
3800  *
3801  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3802  *                   "'" ([^<&'] | Reference)* "'"
3803  *
3804  * 3.3.3 Attribute-Value Normalization:
3805  * Before the value of an attribute is passed to the application or
3806  * checked for validity, the XML processor must normalize it as follows:
3807  * - a character reference is processed by appending the referenced
3808  *   character to the attribute value
3809  * - an entity reference is processed by recursively processing the
3810  *   replacement text of the entity
3811  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3812  *   appending #x20 to the normalized value, except that only a single
3813  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
3814  *   parsed entity or the literal entity value of an internal parsed entity
3815  * - other characters are processed by appending them to the normalized value
3816  * If the declared value is not CDATA, then the XML processor must further
3817  * process the normalized attribute value by discarding any leading and
3818  * trailing space (#x20) characters, and by replacing sequences of space
3819  * (#x20) characters by a single space (#x20) character.
3820  * All attributes for which no declaration has been read should be treated
3821  * by a non-validating parser as if declared CDATA.
3822  *
3823  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3824  */
3825 
3826 
3827 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)3828 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3829     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3830     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3831 }
3832 
3833 /**
3834  * xmlParseSystemLiteral:
3835  * @ctxt:  an XML parser context
3836  *
3837  * parse an XML Literal
3838  *
3839  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3840  *
3841  * Returns the SystemLiteral parsed or NULL
3842  */
3843 
3844 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)3845 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3846     xmlChar *buf = NULL;
3847     int len = 0;
3848     int size = XML_PARSER_BUFFER_SIZE;
3849     int cur, l;
3850     xmlChar stop;
3851     int state = ctxt->instate;
3852     int count = 0;
3853 
3854     SHRINK;
3855     if (RAW == '"') {
3856         NEXT;
3857 	stop = '"';
3858     } else if (RAW == '\'') {
3859         NEXT;
3860 	stop = '\'';
3861     } else {
3862 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3863 	return(NULL);
3864     }
3865 
3866     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3867     if (buf == NULL) {
3868         xmlErrMemory(ctxt, NULL);
3869 	return(NULL);
3870     }
3871     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3872     cur = CUR_CHAR(l);
3873     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3874 	if (len + 5 >= size) {
3875 	    xmlChar *tmp;
3876 
3877 	    size *= 2;
3878 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3879 	    if (tmp == NULL) {
3880 	        xmlFree(buf);
3881 		xmlErrMemory(ctxt, NULL);
3882 		ctxt->instate = (xmlParserInputState) state;
3883 		return(NULL);
3884 	    }
3885 	    buf = tmp;
3886 	}
3887 	count++;
3888 	if (count > 50) {
3889 	    GROW;
3890 	    count = 0;
3891 	}
3892 	COPY_BUF(l,buf,len,cur);
3893 	NEXTL(l);
3894 	cur = CUR_CHAR(l);
3895 	if (cur == 0) {
3896 	    GROW;
3897 	    SHRINK;
3898 	    cur = CUR_CHAR(l);
3899 	}
3900     }
3901     buf[len] = 0;
3902     ctxt->instate = (xmlParserInputState) state;
3903     if (!IS_CHAR(cur)) {
3904 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3905     } else {
3906 	NEXT;
3907     }
3908     return(buf);
3909 }
3910 
3911 /**
3912  * xmlParsePubidLiteral:
3913  * @ctxt:  an XML parser context
3914  *
3915  * parse an XML public literal
3916  *
3917  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3918  *
3919  * Returns the PubidLiteral parsed or NULL.
3920  */
3921 
3922 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)3923 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3924     xmlChar *buf = NULL;
3925     int len = 0;
3926     int size = XML_PARSER_BUFFER_SIZE;
3927     xmlChar cur;
3928     xmlChar stop;
3929     int count = 0;
3930     xmlParserInputState oldstate = ctxt->instate;
3931 
3932     SHRINK;
3933     if (RAW == '"') {
3934         NEXT;
3935 	stop = '"';
3936     } else if (RAW == '\'') {
3937         NEXT;
3938 	stop = '\'';
3939     } else {
3940 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3941 	return(NULL);
3942     }
3943     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3944     if (buf == NULL) {
3945 	xmlErrMemory(ctxt, NULL);
3946 	return(NULL);
3947     }
3948     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3949     cur = CUR;
3950     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3951 	if (len + 1 >= size) {
3952 	    xmlChar *tmp;
3953 
3954 	    size *= 2;
3955 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3956 	    if (tmp == NULL) {
3957 		xmlErrMemory(ctxt, NULL);
3958 		xmlFree(buf);
3959 		return(NULL);
3960 	    }
3961 	    buf = tmp;
3962 	}
3963 	buf[len++] = cur;
3964 	count++;
3965 	if (count > 50) {
3966 	    GROW;
3967 	    count = 0;
3968 	}
3969 	NEXT;
3970 	cur = CUR;
3971 	if (cur == 0) {
3972 	    GROW;
3973 	    SHRINK;
3974 	    cur = CUR;
3975 	}
3976     }
3977     buf[len] = 0;
3978     if (cur != stop) {
3979 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3980     } else {
3981 	NEXT;
3982     }
3983     ctxt->instate = oldstate;
3984     return(buf);
3985 }
3986 
3987 void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3988 
3989 /*
3990  * used for the test in the inner loop of the char data testing
3991  */
3992 static const unsigned char test_char_data[256] = {
3993     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3994     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3995     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3996     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3997     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3998     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3999     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4000     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4001     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4002     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4003     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4004     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4005     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4006     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4007     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4008     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4009     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4010     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4011     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4012     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4013     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4014     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4015     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4016     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4017     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4018     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4019     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4020     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4021     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4022     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4023     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4024     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4025 };
4026 
4027 /**
4028  * xmlParseCharData:
4029  * @ctxt:  an XML parser context
4030  * @cdata:  int indicating whether we are within a CDATA section
4031  *
4032  * parse a CharData section.
4033  * if we are within a CDATA section ']]>' marks an end of section.
4034  *
4035  * The right angle bracket (>) may be represented using the string "&gt;",
4036  * and must, for compatibility, be escaped using "&gt;" or a character
4037  * reference when it appears in the string "]]>" in content, when that
4038  * string is not marking the end of a CDATA section.
4039  *
4040  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4041  */
4042 
4043 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4044 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4045     const xmlChar *in;
4046     int nbchar = 0;
4047     int line = ctxt->input->line;
4048     int col = ctxt->input->col;
4049     int ccol;
4050 
4051     SHRINK;
4052     GROW;
4053     /*
4054      * Accelerated common case where input don't need to be
4055      * modified before passing it to the handler.
4056      */
4057     if (!cdata) {
4058 	in = ctxt->input->cur;
4059 	do {
4060 get_more_space:
4061 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4062 	    if (*in == 0xA) {
4063 		do {
4064 		    ctxt->input->line++; ctxt->input->col = 1;
4065 		    in++;
4066 		} while (*in == 0xA);
4067 		goto get_more_space;
4068 	    }
4069 	    if (*in == '<') {
4070 		nbchar = in - ctxt->input->cur;
4071 		if (nbchar > 0) {
4072 		    const xmlChar *tmp = ctxt->input->cur;
4073 		    ctxt->input->cur = in;
4074 
4075 		    if ((ctxt->sax != NULL) &&
4076 		        (ctxt->sax->ignorableWhitespace !=
4077 		         ctxt->sax->characters)) {
4078 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4079 			    if (ctxt->sax->ignorableWhitespace != NULL)
4080 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4081 						       tmp, nbchar);
4082 			} else {
4083 			    if (ctxt->sax->characters != NULL)
4084 				ctxt->sax->characters(ctxt->userData,
4085 						      tmp, nbchar);
4086 			    if (*ctxt->space == -1)
4087 			        *ctxt->space = -2;
4088 			}
4089 		    } else if ((ctxt->sax != NULL) &&
4090 		               (ctxt->sax->characters != NULL)) {
4091 			ctxt->sax->characters(ctxt->userData,
4092 					      tmp, nbchar);
4093 		    }
4094 		}
4095 		return;
4096 	    }
4097 
4098 get_more:
4099             ccol = ctxt->input->col;
4100 	    while (test_char_data[*in]) {
4101 		in++;
4102 		ccol++;
4103 	    }
4104 	    ctxt->input->col = ccol;
4105 	    if (*in == 0xA) {
4106 		do {
4107 		    ctxt->input->line++; ctxt->input->col = 1;
4108 		    in++;
4109 		} while (*in == 0xA);
4110 		goto get_more;
4111 	    }
4112 	    if (*in == ']') {
4113 		if ((in[1] == ']') && (in[2] == '>')) {
4114 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4115 		    ctxt->input->cur = in;
4116 		    return;
4117 		}
4118 		in++;
4119 		ctxt->input->col++;
4120 		goto get_more;
4121 	    }
4122 	    nbchar = in - ctxt->input->cur;
4123 	    if (nbchar > 0) {
4124 		if ((ctxt->sax != NULL) &&
4125 		    (ctxt->sax->ignorableWhitespace !=
4126 		     ctxt->sax->characters) &&
4127 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4128 		    const xmlChar *tmp = ctxt->input->cur;
4129 		    ctxt->input->cur = in;
4130 
4131 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4132 		        if (ctxt->sax->ignorableWhitespace != NULL)
4133 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4134 							   tmp, nbchar);
4135 		    } else {
4136 		        if (ctxt->sax->characters != NULL)
4137 			    ctxt->sax->characters(ctxt->userData,
4138 						  tmp, nbchar);
4139 			if (*ctxt->space == -1)
4140 			    *ctxt->space = -2;
4141 		    }
4142                     line = ctxt->input->line;
4143                     col = ctxt->input->col;
4144 		} else if (ctxt->sax != NULL) {
4145 		    if (ctxt->sax->characters != NULL)
4146 			ctxt->sax->characters(ctxt->userData,
4147 					      ctxt->input->cur, nbchar);
4148                     line = ctxt->input->line;
4149                     col = ctxt->input->col;
4150 		}
4151                 /* something really bad happened in the SAX callback */
4152                 if (ctxt->instate != XML_PARSER_CONTENT)
4153                     return;
4154 	    }
4155 	    ctxt->input->cur = in;
4156 	    if (*in == 0xD) {
4157 		in++;
4158 		if (*in == 0xA) {
4159 		    ctxt->input->cur = in;
4160 		    in++;
4161 		    ctxt->input->line++; ctxt->input->col = 1;
4162 		    continue; /* while */
4163 		}
4164 		in--;
4165 	    }
4166 	    if (*in == '<') {
4167 		return;
4168 	    }
4169 	    if (*in == '&') {
4170 		return;
4171 	    }
4172 	    SHRINK;
4173 	    GROW;
4174 	    in = ctxt->input->cur;
4175 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4176 	nbchar = 0;
4177     }
4178     ctxt->input->line = line;
4179     ctxt->input->col = col;
4180     xmlParseCharDataComplex(ctxt, cdata);
4181 }
4182 
4183 /**
4184  * xmlParseCharDataComplex:
4185  * @ctxt:  an XML parser context
4186  * @cdata:  int indicating whether we are within a CDATA section
4187  *
4188  * parse a CharData section.this is the fallback function
4189  * of xmlParseCharData() when the parsing requires handling
4190  * of non-ASCII characters.
4191  */
4192 void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4193 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4194     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4195     int nbchar = 0;
4196     int cur, l;
4197     int count = 0;
4198 
4199     SHRINK;
4200     GROW;
4201     cur = CUR_CHAR(l);
4202     while ((cur != '<') && /* checked */
4203            (cur != '&') &&
4204 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4205 	if ((cur == ']') && (NXT(1) == ']') &&
4206 	    (NXT(2) == '>')) {
4207 	    if (cdata) break;
4208 	    else {
4209 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4210 	    }
4211 	}
4212 	COPY_BUF(l,buf,nbchar,cur);
4213 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4214 	    buf[nbchar] = 0;
4215 
4216 	    /*
4217 	     * OK the segment is to be consumed as chars.
4218 	     */
4219 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4220 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4221 		    if (ctxt->sax->ignorableWhitespace != NULL)
4222 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4223 			                               buf, nbchar);
4224 		} else {
4225 		    if (ctxt->sax->characters != NULL)
4226 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4227 		    if ((ctxt->sax->characters !=
4228 		         ctxt->sax->ignorableWhitespace) &&
4229 			(*ctxt->space == -1))
4230 			*ctxt->space = -2;
4231 		}
4232 	    }
4233 	    nbchar = 0;
4234             /* something really bad happened in the SAX callback */
4235             if (ctxt->instate != XML_PARSER_CONTENT)
4236                 return;
4237 	}
4238 	count++;
4239 	if (count > 50) {
4240 	    GROW;
4241 	    count = 0;
4242 	}
4243 	NEXTL(l);
4244 	cur = CUR_CHAR(l);
4245     }
4246     if (nbchar != 0) {
4247         buf[nbchar] = 0;
4248 	/*
4249 	 * OK the segment is to be consumed as chars.
4250 	 */
4251 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4252 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4253 		if (ctxt->sax->ignorableWhitespace != NULL)
4254 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4255 	    } else {
4256 		if (ctxt->sax->characters != NULL)
4257 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4258 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4259 		    (*ctxt->space == -1))
4260 		    *ctxt->space = -2;
4261 	    }
4262 	}
4263     }
4264     if ((cur != 0) && (!IS_CHAR(cur))) {
4265 	/* Generate the error and skip the offending character */
4266         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4267                           "PCDATA invalid Char value %d\n",
4268 	                  cur);
4269 	NEXTL(l);
4270     }
4271 }
4272 
4273 /**
4274  * xmlParseExternalID:
4275  * @ctxt:  an XML parser context
4276  * @publicID:  a xmlChar** receiving PubidLiteral
4277  * @strict: indicate whether we should restrict parsing to only
4278  *          production [75], see NOTE below
4279  *
4280  * Parse an External ID or a Public ID
4281  *
4282  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4283  *       'PUBLIC' S PubidLiteral S SystemLiteral
4284  *
4285  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4286  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4287  *
4288  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4289  *
4290  * Returns the function returns SystemLiteral and in the second
4291  *                case publicID receives PubidLiteral, is strict is off
4292  *                it is possible to return NULL and have publicID set.
4293  */
4294 
4295 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4296 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4297     xmlChar *URI = NULL;
4298 
4299     SHRINK;
4300 
4301     *publicID = NULL;
4302     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4303         SKIP(6);
4304 	if (!IS_BLANK_CH(CUR)) {
4305 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4306 	                   "Space required after 'SYSTEM'\n");
4307 	}
4308         SKIP_BLANKS;
4309 	URI = xmlParseSystemLiteral(ctxt);
4310 	if (URI == NULL) {
4311 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4312         }
4313     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4314         SKIP(6);
4315 	if (!IS_BLANK_CH(CUR)) {
4316 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4317 		    "Space required after 'PUBLIC'\n");
4318 	}
4319         SKIP_BLANKS;
4320 	*publicID = xmlParsePubidLiteral(ctxt);
4321 	if (*publicID == NULL) {
4322 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4323 	}
4324 	if (strict) {
4325 	    /*
4326 	     * We don't handle [83] so "S SystemLiteral" is required.
4327 	     */
4328 	    if (!IS_BLANK_CH(CUR)) {
4329 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4330 			"Space required after the Public Identifier\n");
4331 	    }
4332 	} else {
4333 	    /*
4334 	     * We handle [83] so we return immediately, if
4335 	     * "S SystemLiteral" is not detected. From a purely parsing
4336 	     * point of view that's a nice mess.
4337 	     */
4338 	    const xmlChar *ptr;
4339 	    GROW;
4340 
4341 	    ptr = CUR_PTR;
4342 	    if (!IS_BLANK_CH(*ptr)) return(NULL);
4343 
4344 	    while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4345 	    if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4346 	}
4347         SKIP_BLANKS;
4348 	URI = xmlParseSystemLiteral(ctxt);
4349 	if (URI == NULL) {
4350 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4351         }
4352     }
4353     return(URI);
4354 }
4355 
4356 /**
4357  * xmlParseCommentComplex:
4358  * @ctxt:  an XML parser context
4359  * @buf:  the already parsed part of the buffer
4360  * @len:  number of bytes filles in the buffer
4361  * @size:  allocated size of the buffer
4362  *
4363  * Skip an XML (SGML) comment <!-- .... -->
4364  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4365  *  must not occur within comments. "
4366  * This is the slow routine in case the accelerator for ascii didn't work
4367  *
4368  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4369  */
4370 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,int len,int size)4371 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4372     int q, ql;
4373     int r, rl;
4374     int cur, l;
4375     int count = 0;
4376     int inputid;
4377 
4378     inputid = ctxt->input->id;
4379 
4380     if (buf == NULL) {
4381         len = 0;
4382 	size = XML_PARSER_BUFFER_SIZE;
4383 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4384 	if (buf == NULL) {
4385 	    xmlErrMemory(ctxt, NULL);
4386 	    return;
4387 	}
4388     }
4389     GROW;	/* Assure there's enough input data */
4390     q = CUR_CHAR(ql);
4391     if (q == 0)
4392         goto not_terminated;
4393     if (!IS_CHAR(q)) {
4394         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4395                           "xmlParseComment: invalid xmlChar value %d\n",
4396 	                  q);
4397 	xmlFree (buf);
4398 	return;
4399     }
4400     NEXTL(ql);
4401     r = CUR_CHAR(rl);
4402     if (r == 0)
4403         goto not_terminated;
4404     if (!IS_CHAR(r)) {
4405         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4406                           "xmlParseComment: invalid xmlChar value %d\n",
4407 	                  q);
4408 	xmlFree (buf);
4409 	return;
4410     }
4411     NEXTL(rl);
4412     cur = CUR_CHAR(l);
4413     if (cur == 0)
4414         goto not_terminated;
4415     while (IS_CHAR(cur) && /* checked */
4416            ((cur != '>') ||
4417 	    (r != '-') || (q != '-'))) {
4418 	if ((r == '-') && (q == '-')) {
4419 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4420 	}
4421 	if (len + 5 >= size) {
4422 	    xmlChar *new_buf;
4423 	    size *= 2;
4424 	    new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4425 	    if (new_buf == NULL) {
4426 		xmlFree (buf);
4427 		xmlErrMemory(ctxt, NULL);
4428 		return;
4429 	    }
4430 	    buf = new_buf;
4431 	}
4432 	COPY_BUF(ql,buf,len,q);
4433 	q = r;
4434 	ql = rl;
4435 	r = cur;
4436 	rl = l;
4437 
4438 	count++;
4439 	if (count > 50) {
4440 	    GROW;
4441 	    count = 0;
4442 	}
4443 	NEXTL(l);
4444 	cur = CUR_CHAR(l);
4445 	if (cur == 0) {
4446 	    SHRINK;
4447 	    GROW;
4448 	    cur = CUR_CHAR(l);
4449 	}
4450     }
4451     buf[len] = 0;
4452     if (cur == 0) {
4453 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4454 	                     "Comment not terminated \n<!--%.50s\n", buf);
4455     } else if (!IS_CHAR(cur)) {
4456         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4457                           "xmlParseComment: invalid xmlChar value %d\n",
4458 	                  cur);
4459     } else {
4460 	if (inputid != ctxt->input->id) {
4461 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4462 		"Comment doesn't start and stop in the same entity\n");
4463 	}
4464         NEXT;
4465 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4466 	    (!ctxt->disableSAX))
4467 	    ctxt->sax->comment(ctxt->userData, buf);
4468     }
4469     xmlFree(buf);
4470     return;
4471 not_terminated:
4472     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4473 			 "Comment not terminated\n", NULL);
4474     xmlFree(buf);
4475     return;
4476 }
4477 
4478 /**
4479  * xmlParseComment:
4480  * @ctxt:  an XML parser context
4481  *
4482  * Skip an XML (SGML) comment <!-- .... -->
4483  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4484  *  must not occur within comments. "
4485  *
4486  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4487  */
4488 void
xmlParseComment(xmlParserCtxtPtr ctxt)4489 xmlParseComment(xmlParserCtxtPtr ctxt) {
4490     xmlChar *buf = NULL;
4491     int size = XML_PARSER_BUFFER_SIZE;
4492     int len = 0;
4493     xmlParserInputState state;
4494     const xmlChar *in;
4495     int nbchar = 0, ccol;
4496     int inputid;
4497 
4498     /*
4499      * Check that there is a comment right here.
4500      */
4501     if ((RAW != '<') || (NXT(1) != '!') ||
4502         (NXT(2) != '-') || (NXT(3) != '-')) return;
4503     state = ctxt->instate;
4504     ctxt->instate = XML_PARSER_COMMENT;
4505     inputid = ctxt->input->id;
4506     SKIP(4);
4507     SHRINK;
4508     GROW;
4509 
4510     /*
4511      * Accelerated common case where input don't need to be
4512      * modified before passing it to the handler.
4513      */
4514     in = ctxt->input->cur;
4515     do {
4516 	if (*in == 0xA) {
4517 	    do {
4518 		ctxt->input->line++; ctxt->input->col = 1;
4519 		in++;
4520 	    } while (*in == 0xA);
4521 	}
4522 get_more:
4523         ccol = ctxt->input->col;
4524 	while (((*in > '-') && (*in <= 0x7F)) ||
4525 	       ((*in >= 0x20) && (*in < '-')) ||
4526 	       (*in == 0x09)) {
4527 		    in++;
4528 		    ccol++;
4529 	}
4530 	ctxt->input->col = ccol;
4531 	if (*in == 0xA) {
4532 	    do {
4533 		ctxt->input->line++; ctxt->input->col = 1;
4534 		in++;
4535 	    } while (*in == 0xA);
4536 	    goto get_more;
4537 	}
4538 	nbchar = in - ctxt->input->cur;
4539 	/*
4540 	 * save current set of data
4541 	 */
4542 	if (nbchar > 0) {
4543 	    if ((ctxt->sax != NULL) &&
4544 		(ctxt->sax->comment != NULL)) {
4545 		if (buf == NULL) {
4546 		    if ((*in == '-') && (in[1] == '-'))
4547 		        size = nbchar + 1;
4548 		    else
4549 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4550 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4551 		    if (buf == NULL) {
4552 		        xmlErrMemory(ctxt, NULL);
4553 			ctxt->instate = state;
4554 			return;
4555 		    }
4556 		    len = 0;
4557 		} else if (len + nbchar + 1 >= size) {
4558 		    xmlChar *new_buf;
4559 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4560 		    new_buf = (xmlChar *) xmlRealloc(buf,
4561 		                                     size * sizeof(xmlChar));
4562 		    if (new_buf == NULL) {
4563 		        xmlFree (buf);
4564 			xmlErrMemory(ctxt, NULL);
4565 			ctxt->instate = state;
4566 			return;
4567 		    }
4568 		    buf = new_buf;
4569 		}
4570 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4571 		len += nbchar;
4572 		buf[len] = 0;
4573 	    }
4574 	}
4575 	ctxt->input->cur = in;
4576 	if (*in == 0xA) {
4577 	    in++;
4578 	    ctxt->input->line++; ctxt->input->col = 1;
4579 	}
4580 	if (*in == 0xD) {
4581 	    in++;
4582 	    if (*in == 0xA) {
4583 		ctxt->input->cur = in;
4584 		in++;
4585 		ctxt->input->line++; ctxt->input->col = 1;
4586 		continue; /* while */
4587 	    }
4588 	    in--;
4589 	}
4590 	SHRINK;
4591 	GROW;
4592 	in = ctxt->input->cur;
4593 	if (*in == '-') {
4594 	    if (in[1] == '-') {
4595 	        if (in[2] == '>') {
4596 		    if (ctxt->input->id != inputid) {
4597 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4598 			"comment doesn't start and stop in the same entity\n");
4599 		    }
4600 		    SKIP(3);
4601 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4602 		        (!ctxt->disableSAX)) {
4603 			if (buf != NULL)
4604 			    ctxt->sax->comment(ctxt->userData, buf);
4605 			else
4606 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4607 		    }
4608 		    if (buf != NULL)
4609 		        xmlFree(buf);
4610 		    ctxt->instate = state;
4611 		    return;
4612 		}
4613 		if (buf != NULL)
4614 		    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4615 		                      "Comment not terminated \n<!--%.50s\n",
4616 				      buf);
4617 		else
4618 		    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4619 		                      "Comment not terminated \n", NULL);
4620 		in++;
4621 		ctxt->input->col++;
4622 	    }
4623 	    in++;
4624 	    ctxt->input->col++;
4625 	    goto get_more;
4626 	}
4627     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4628     xmlParseCommentComplex(ctxt, buf, len, size);
4629     ctxt->instate = state;
4630     return;
4631 }
4632 
4633 
4634 /**
4635  * xmlParsePITarget:
4636  * @ctxt:  an XML parser context
4637  *
4638  * parse the name of a PI
4639  *
4640  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4641  *
4642  * Returns the PITarget name or NULL
4643  */
4644 
4645 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4646 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4647     const xmlChar *name;
4648 
4649     name = xmlParseName(ctxt);
4650     if ((name != NULL) &&
4651         ((name[0] == 'x') || (name[0] == 'X')) &&
4652         ((name[1] == 'm') || (name[1] == 'M')) &&
4653         ((name[2] == 'l') || (name[2] == 'L'))) {
4654 	int i;
4655 	if ((name[0] == 'x') && (name[1] == 'm') &&
4656 	    (name[2] == 'l') && (name[3] == 0)) {
4657 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4658 		 "XML declaration allowed only at the start of the document\n");
4659 	    return(name);
4660 	} else if (name[3] == 0) {
4661 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4662 	    return(name);
4663 	}
4664 	for (i = 0;;i++) {
4665 	    if (xmlW3CPIs[i] == NULL) break;
4666 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4667 	        return(name);
4668 	}
4669 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4670 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
4671 		      NULL, NULL);
4672     }
4673     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4674 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
4675 		 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4676     }
4677     return(name);
4678 }
4679 
4680 #ifdef LIBXML_CATALOG_ENABLED
4681 /**
4682  * xmlParseCatalogPI:
4683  * @ctxt:  an XML parser context
4684  * @catalog:  the PI value string
4685  *
4686  * parse an XML Catalog Processing Instruction.
4687  *
4688  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4689  *
4690  * Occurs only if allowed by the user and if happening in the Misc
4691  * part of the document before any doctype informations
4692  * This will add the given catalog to the parsing context in order
4693  * to be used if there is a resolution need further down in the document
4694  */
4695 
4696 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)4697 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4698     xmlChar *URL = NULL;
4699     const xmlChar *tmp, *base;
4700     xmlChar marker;
4701 
4702     tmp = catalog;
4703     while (IS_BLANK_CH(*tmp)) tmp++;
4704     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4705 	goto error;
4706     tmp += 7;
4707     while (IS_BLANK_CH(*tmp)) tmp++;
4708     if (*tmp != '=') {
4709 	return;
4710     }
4711     tmp++;
4712     while (IS_BLANK_CH(*tmp)) tmp++;
4713     marker = *tmp;
4714     if ((marker != '\'') && (marker != '"'))
4715 	goto error;
4716     tmp++;
4717     base = tmp;
4718     while ((*tmp != 0) && (*tmp != marker)) tmp++;
4719     if (*tmp == 0)
4720 	goto error;
4721     URL = xmlStrndup(base, tmp - base);
4722     tmp++;
4723     while (IS_BLANK_CH(*tmp)) tmp++;
4724     if (*tmp != 0)
4725 	goto error;
4726 
4727     if (URL != NULL) {
4728 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4729 	xmlFree(URL);
4730     }
4731     return;
4732 
4733 error:
4734     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4735 	          "Catalog PI syntax error: %s\n",
4736 		  catalog, NULL);
4737     if (URL != NULL)
4738 	xmlFree(URL);
4739 }
4740 #endif
4741 
4742 /**
4743  * xmlParsePI:
4744  * @ctxt:  an XML parser context
4745  *
4746  * parse an XML Processing Instruction.
4747  *
4748  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4749  *
4750  * The processing is transfered to SAX once parsed.
4751  */
4752 
4753 void
xmlParsePI(xmlParserCtxtPtr ctxt)4754 xmlParsePI(xmlParserCtxtPtr ctxt) {
4755     xmlChar *buf = NULL;
4756     int len = 0;
4757     int size = XML_PARSER_BUFFER_SIZE;
4758     int cur, l;
4759     const xmlChar *target;
4760     xmlParserInputState state;
4761     int count = 0;
4762 
4763     if ((RAW == '<') && (NXT(1) == '?')) {
4764 	xmlParserInputPtr input = ctxt->input;
4765 	state = ctxt->instate;
4766         ctxt->instate = XML_PARSER_PI;
4767 	/*
4768 	 * this is a Processing Instruction.
4769 	 */
4770 	SKIP(2);
4771 	SHRINK;
4772 
4773 	/*
4774 	 * Parse the target name and check for special support like
4775 	 * namespace.
4776 	 */
4777         target = xmlParsePITarget(ctxt);
4778 	if (target != NULL) {
4779 	    if ((RAW == '?') && (NXT(1) == '>')) {
4780 		if (input != ctxt->input) {
4781 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4782 	    "PI declaration doesn't start and stop in the same entity\n");
4783 		}
4784 		SKIP(2);
4785 
4786 		/*
4787 		 * SAX: PI detected.
4788 		 */
4789 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
4790 		    (ctxt->sax->processingInstruction != NULL))
4791 		    ctxt->sax->processingInstruction(ctxt->userData,
4792 		                                     target, NULL);
4793 		ctxt->instate = state;
4794 		return;
4795 	    }
4796 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4797 	    if (buf == NULL) {
4798 		xmlErrMemory(ctxt, NULL);
4799 		ctxt->instate = state;
4800 		return;
4801 	    }
4802 	    cur = CUR;
4803 	    if (!IS_BLANK(cur)) {
4804 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4805 			  "ParsePI: PI %s space expected\n", target);
4806 	    }
4807             SKIP_BLANKS;
4808 	    cur = CUR_CHAR(l);
4809 	    while (IS_CHAR(cur) && /* checked */
4810 		   ((cur != '?') || (NXT(1) != '>'))) {
4811 		if (len + 5 >= size) {
4812 		    xmlChar *tmp;
4813 
4814 		    size *= 2;
4815 		    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4816 		    if (tmp == NULL) {
4817 			xmlErrMemory(ctxt, NULL);
4818 			xmlFree(buf);
4819 			ctxt->instate = state;
4820 			return;
4821 		    }
4822 		    buf = tmp;
4823 		}
4824 		count++;
4825 		if (count > 50) {
4826 		    GROW;
4827 		    count = 0;
4828 		}
4829 		COPY_BUF(l,buf,len,cur);
4830 		NEXTL(l);
4831 		cur = CUR_CHAR(l);
4832 		if (cur == 0) {
4833 		    SHRINK;
4834 		    GROW;
4835 		    cur = CUR_CHAR(l);
4836 		}
4837 	    }
4838 	    buf[len] = 0;
4839 	    if (cur != '?') {
4840 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4841 		      "ParsePI: PI %s never end ...\n", target);
4842 	    } else {
4843 		if (input != ctxt->input) {
4844 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4845 	    "PI declaration doesn't start and stop in the same entity\n");
4846 		}
4847 		SKIP(2);
4848 
4849 #ifdef LIBXML_CATALOG_ENABLED
4850 		if (((state == XML_PARSER_MISC) ||
4851 	             (state == XML_PARSER_START)) &&
4852 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
4853 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
4854 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4855 			(allow == XML_CATA_ALLOW_ALL))
4856 			xmlParseCatalogPI(ctxt, buf);
4857 		}
4858 #endif
4859 
4860 
4861 		/*
4862 		 * SAX: PI detected.
4863 		 */
4864 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
4865 		    (ctxt->sax->processingInstruction != NULL))
4866 		    ctxt->sax->processingInstruction(ctxt->userData,
4867 		                                     target, buf);
4868 	    }
4869 	    xmlFree(buf);
4870 	} else {
4871 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4872 	}
4873 	ctxt->instate = state;
4874     }
4875 }
4876 
4877 /**
4878  * xmlParseNotationDecl:
4879  * @ctxt:  an XML parser context
4880  *
4881  * parse a notation declaration
4882  *
4883  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
4884  *
4885  * Hence there is actually 3 choices:
4886  *     'PUBLIC' S PubidLiteral
4887  *     'PUBLIC' S PubidLiteral S SystemLiteral
4888  * and 'SYSTEM' S SystemLiteral
4889  *
4890  * See the NOTE on xmlParseExternalID().
4891  */
4892 
4893 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)4894 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4895     const xmlChar *name;
4896     xmlChar *Pubid;
4897     xmlChar *Systemid;
4898 
4899     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4900 	xmlParserInputPtr input = ctxt->input;
4901 	SHRINK;
4902 	SKIP(10);
4903 	if (!IS_BLANK_CH(CUR)) {
4904 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4905 			   "Space required after '<!NOTATION'\n");
4906 	    return;
4907 	}
4908 	SKIP_BLANKS;
4909 
4910         name = xmlParseName(ctxt);
4911 	if (name == NULL) {
4912 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4913 	    return;
4914 	}
4915 	if (!IS_BLANK_CH(CUR)) {
4916 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4917 		     "Space required after the NOTATION name'\n");
4918 	    return;
4919 	}
4920 	if (xmlStrchr(name, ':') != NULL) {
4921 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
4922 		     "colon are forbidden from notation names '%s'\n",
4923 		     name, NULL, NULL);
4924 	}
4925 	SKIP_BLANKS;
4926 
4927 	/*
4928 	 * Parse the IDs.
4929 	 */
4930 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4931 	SKIP_BLANKS;
4932 
4933 	if (RAW == '>') {
4934 	    if (input != ctxt->input) {
4935 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4936 	"Notation declaration doesn't start and stop in the same entity\n");
4937 	    }
4938 	    NEXT;
4939 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4940 		(ctxt->sax->notationDecl != NULL))
4941 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4942 	} else {
4943 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4944 	}
4945 	if (Systemid != NULL) xmlFree(Systemid);
4946 	if (Pubid != NULL) xmlFree(Pubid);
4947     }
4948 }
4949 
4950 /**
4951  * xmlParseEntityDecl:
4952  * @ctxt:  an XML parser context
4953  *
4954  * parse <!ENTITY declarations
4955  *
4956  * [70] EntityDecl ::= GEDecl | PEDecl
4957  *
4958  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4959  *
4960  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4961  *
4962  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4963  *
4964  * [74] PEDef ::= EntityValue | ExternalID
4965  *
4966  * [76] NDataDecl ::= S 'NDATA' S Name
4967  *
4968  * [ VC: Notation Declared ]
4969  * The Name must match the declared name of a notation.
4970  */
4971 
4972 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)4973 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4974     const xmlChar *name = NULL;
4975     xmlChar *value = NULL;
4976     xmlChar *URI = NULL, *literal = NULL;
4977     const xmlChar *ndata = NULL;
4978     int isParameter = 0;
4979     xmlChar *orig = NULL;
4980     int skipped;
4981 
4982     /* GROW; done in the caller */
4983     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4984 	xmlParserInputPtr input = ctxt->input;
4985 	SHRINK;
4986 	SKIP(8);
4987 	skipped = SKIP_BLANKS;
4988 	if (skipped == 0) {
4989 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4990 			   "Space required after '<!ENTITY'\n");
4991 	}
4992 
4993 	if (RAW == '%') {
4994 	    NEXT;
4995 	    skipped = SKIP_BLANKS;
4996 	    if (skipped == 0) {
4997 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4998 			       "Space required after '%'\n");
4999 	    }
5000 	    isParameter = 1;
5001 	}
5002 
5003         name = xmlParseName(ctxt);
5004 	if (name == NULL) {
5005 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5006 	                   "xmlParseEntityDecl: no name\n");
5007             return;
5008 	}
5009 	if (xmlStrchr(name, ':') != NULL) {
5010 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5011 		     "colon are forbidden from entities names '%s'\n",
5012 		     name, NULL, NULL);
5013 	}
5014         skipped = SKIP_BLANKS;
5015 	if (skipped == 0) {
5016 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5017 			   "Space required after the entity name\n");
5018 	}
5019 
5020 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5021 	/*
5022 	 * handle the various case of definitions...
5023 	 */
5024 	if (isParameter) {
5025 	    if ((RAW == '"') || (RAW == '\'')) {
5026 	        value = xmlParseEntityValue(ctxt, &orig);
5027 		if (value) {
5028 		    if ((ctxt->sax != NULL) &&
5029 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5030 			ctxt->sax->entityDecl(ctxt->userData, name,
5031 		                    XML_INTERNAL_PARAMETER_ENTITY,
5032 				    NULL, NULL, value);
5033 		}
5034 	    } else {
5035 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5036 		if ((URI == NULL) && (literal == NULL)) {
5037 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5038 		}
5039 		if (URI) {
5040 		    xmlURIPtr uri;
5041 
5042 		    uri = xmlParseURI((const char *) URI);
5043 		    if (uri == NULL) {
5044 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5045 				     "Invalid URI: %s\n", URI);
5046 			/*
5047 			 * This really ought to be a well formedness error
5048 			 * but the XML Core WG decided otherwise c.f. issue
5049 			 * E26 of the XML erratas.
5050 			 */
5051 		    } else {
5052 			if (uri->fragment != NULL) {
5053 			    /*
5054 			     * Okay this is foolish to block those but not
5055 			     * invalid URIs.
5056 			     */
5057 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5058 			} else {
5059 			    if ((ctxt->sax != NULL) &&
5060 				(!ctxt->disableSAX) &&
5061 				(ctxt->sax->entityDecl != NULL))
5062 				ctxt->sax->entityDecl(ctxt->userData, name,
5063 					    XML_EXTERNAL_PARAMETER_ENTITY,
5064 					    literal, URI, NULL);
5065 			}
5066 			xmlFreeURI(uri);
5067 		    }
5068 		}
5069 	    }
5070 	} else {
5071 	    if ((RAW == '"') || (RAW == '\'')) {
5072 	        value = xmlParseEntityValue(ctxt, &orig);
5073 		if ((ctxt->sax != NULL) &&
5074 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5075 		    ctxt->sax->entityDecl(ctxt->userData, name,
5076 				XML_INTERNAL_GENERAL_ENTITY,
5077 				NULL, NULL, value);
5078 		/*
5079 		 * For expat compatibility in SAX mode.
5080 		 */
5081 		if ((ctxt->myDoc == NULL) ||
5082 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5083 		    if (ctxt->myDoc == NULL) {
5084 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5085 			if (ctxt->myDoc == NULL) {
5086 			    xmlErrMemory(ctxt, "New Doc failed");
5087 			    return;
5088 			}
5089 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5090 		    }
5091 		    if (ctxt->myDoc->intSubset == NULL)
5092 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5093 					    BAD_CAST "fake", NULL, NULL);
5094 
5095 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5096 			              NULL, NULL, value);
5097 		}
5098 	    } else {
5099 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5100 		if ((URI == NULL) && (literal == NULL)) {
5101 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5102 		}
5103 		if (URI) {
5104 		    xmlURIPtr uri;
5105 
5106 		    uri = xmlParseURI((const char *)URI);
5107 		    if (uri == NULL) {
5108 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5109 				     "Invalid URI: %s\n", URI);
5110 			/*
5111 			 * This really ought to be a well formedness error
5112 			 * but the XML Core WG decided otherwise c.f. issue
5113 			 * E26 of the XML erratas.
5114 			 */
5115 		    } else {
5116 			if (uri->fragment != NULL) {
5117 			    /*
5118 			     * Okay this is foolish to block those but not
5119 			     * invalid URIs.
5120 			     */
5121 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5122 			}
5123 			xmlFreeURI(uri);
5124 		    }
5125 		}
5126 		if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5127 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5128 				   "Space required before 'NDATA'\n");
5129 		}
5130 		SKIP_BLANKS;
5131 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5132 		    SKIP(5);
5133 		    if (!IS_BLANK_CH(CUR)) {
5134 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5135 				       "Space required after 'NDATA'\n");
5136 		    }
5137 		    SKIP_BLANKS;
5138 		    ndata = xmlParseName(ctxt);
5139 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5140 		        (ctxt->sax->unparsedEntityDecl != NULL))
5141 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5142 				    literal, URI, ndata);
5143 		} else {
5144 		    if ((ctxt->sax != NULL) &&
5145 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5146 			ctxt->sax->entityDecl(ctxt->userData, name,
5147 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5148 				    literal, URI, NULL);
5149 		    /*
5150 		     * For expat compatibility in SAX mode.
5151 		     * assuming the entity repalcement was asked for
5152 		     */
5153 		    if ((ctxt->replaceEntities != 0) &&
5154 			((ctxt->myDoc == NULL) ||
5155 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5156 			if (ctxt->myDoc == NULL) {
5157 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5158 			    if (ctxt->myDoc == NULL) {
5159 			        xmlErrMemory(ctxt, "New Doc failed");
5160 				return;
5161 			    }
5162 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5163 			}
5164 
5165 			if (ctxt->myDoc->intSubset == NULL)
5166 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5167 						BAD_CAST "fake", NULL, NULL);
5168 			xmlSAX2EntityDecl(ctxt, name,
5169 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5170 				          literal, URI, NULL);
5171 		    }
5172 		}
5173 	    }
5174 	}
5175 	SKIP_BLANKS;
5176 	if (RAW != '>') {
5177 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5178 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5179 	} else {
5180 	    if (input != ctxt->input) {
5181 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5182 	"Entity declaration doesn't start and stop in the same entity\n");
5183 	    }
5184 	    NEXT;
5185 	}
5186 	if (orig != NULL) {
5187 	    /*
5188 	     * Ugly mechanism to save the raw entity value.
5189 	     */
5190 	    xmlEntityPtr cur = NULL;
5191 
5192 	    if (isParameter) {
5193 	        if ((ctxt->sax != NULL) &&
5194 		    (ctxt->sax->getParameterEntity != NULL))
5195 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5196 	    } else {
5197 	        if ((ctxt->sax != NULL) &&
5198 		    (ctxt->sax->getEntity != NULL))
5199 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5200 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5201 		    cur = xmlSAX2GetEntity(ctxt, name);
5202 		}
5203 	    }
5204             if (cur != NULL) {
5205 	        if (cur->orig != NULL)
5206 		    xmlFree(orig);
5207 		else
5208 		    cur->orig = orig;
5209 	    } else
5210 		xmlFree(orig);
5211 	}
5212 	if (value != NULL) xmlFree(value);
5213 	if (URI != NULL) xmlFree(URI);
5214 	if (literal != NULL) xmlFree(literal);
5215     }
5216 }
5217 
5218 /**
5219  * xmlParseDefaultDecl:
5220  * @ctxt:  an XML parser context
5221  * @value:  Receive a possible fixed default value for the attribute
5222  *
5223  * Parse an attribute default declaration
5224  *
5225  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5226  *
5227  * [ VC: Required Attribute ]
5228  * if the default declaration is the keyword #REQUIRED, then the
5229  * attribute must be specified for all elements of the type in the
5230  * attribute-list declaration.
5231  *
5232  * [ VC: Attribute Default Legal ]
5233  * The declared default value must meet the lexical constraints of
5234  * the declared attribute type c.f. xmlValidateAttributeDecl()
5235  *
5236  * [ VC: Fixed Attribute Default ]
5237  * if an attribute has a default value declared with the #FIXED
5238  * keyword, instances of that attribute must match the default value.
5239  *
5240  * [ WFC: No < in Attribute Values ]
5241  * handled in xmlParseAttValue()
5242  *
5243  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5244  *          or XML_ATTRIBUTE_FIXED.
5245  */
5246 
5247 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5248 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5249     int val;
5250     xmlChar *ret;
5251 
5252     *value = NULL;
5253     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5254 	SKIP(9);
5255 	return(XML_ATTRIBUTE_REQUIRED);
5256     }
5257     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5258 	SKIP(8);
5259 	return(XML_ATTRIBUTE_IMPLIED);
5260     }
5261     val = XML_ATTRIBUTE_NONE;
5262     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5263 	SKIP(6);
5264 	val = XML_ATTRIBUTE_FIXED;
5265 	if (!IS_BLANK_CH(CUR)) {
5266 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5267 			   "Space required after '#FIXED'\n");
5268 	}
5269 	SKIP_BLANKS;
5270     }
5271     ret = xmlParseAttValue(ctxt);
5272     ctxt->instate = XML_PARSER_DTD;
5273     if (ret == NULL) {
5274 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5275 		       "Attribute default value declaration error\n");
5276     } else
5277         *value = ret;
5278     return(val);
5279 }
5280 
5281 /**
5282  * xmlParseNotationType:
5283  * @ctxt:  an XML parser context
5284  *
5285  * parse an Notation attribute type.
5286  *
5287  * Note: the leading 'NOTATION' S part has already being parsed...
5288  *
5289  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5290  *
5291  * [ VC: Notation Attributes ]
5292  * Values of this type must match one of the notation names included
5293  * in the declaration; all notation names in the declaration must be declared.
5294  *
5295  * Returns: the notation attribute tree built while parsing
5296  */
5297 
5298 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5299 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5300     const xmlChar *name;
5301     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5302 
5303     if (RAW != '(') {
5304 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5305 	return(NULL);
5306     }
5307     SHRINK;
5308     do {
5309         NEXT;
5310 	SKIP_BLANKS;
5311         name = xmlParseName(ctxt);
5312 	if (name == NULL) {
5313 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5314 			   "Name expected in NOTATION declaration\n");
5315 	    return(ret);
5316 	}
5317 	tmp = ret;
5318 	while (tmp != NULL) {
5319 	    if (xmlStrEqual(name, tmp->name)) {
5320 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5321 	  "standalone: attribute notation value token %s duplicated\n",
5322 				 name, NULL);
5323 		if (!xmlDictOwns(ctxt->dict, name))
5324 		    xmlFree((xmlChar *) name);
5325 		break;
5326 	    }
5327 	    tmp = tmp->next;
5328 	}
5329 	if (tmp == NULL) {
5330 	    cur = xmlCreateEnumeration(name);
5331 	    if (cur == NULL) return(ret);
5332 	    if (last == NULL) ret = last = cur;
5333 	    else {
5334 		last->next = cur;
5335 		last = cur;
5336 	    }
5337 	}
5338 	SKIP_BLANKS;
5339     } while (RAW == '|');
5340     if (RAW != ')') {
5341 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5342 	if ((last != NULL) && (last != ret))
5343 	    xmlFreeEnumeration(last);
5344 	return(ret);
5345     }
5346     NEXT;
5347     return(ret);
5348 }
5349 
5350 /**
5351  * xmlParseEnumerationType:
5352  * @ctxt:  an XML parser context
5353  *
5354  * parse an Enumeration attribute type.
5355  *
5356  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5357  *
5358  * [ VC: Enumeration ]
5359  * Values of this type must match one of the Nmtoken tokens in
5360  * the declaration
5361  *
5362  * Returns: the enumeration attribute tree built while parsing
5363  */
5364 
5365 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5366 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5367     xmlChar *name;
5368     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5369 
5370     if (RAW != '(') {
5371 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5372 	return(NULL);
5373     }
5374     SHRINK;
5375     do {
5376         NEXT;
5377 	SKIP_BLANKS;
5378         name = xmlParseNmtoken(ctxt);
5379 	if (name == NULL) {
5380 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5381 	    return(ret);
5382 	}
5383 	tmp = ret;
5384 	while (tmp != NULL) {
5385 	    if (xmlStrEqual(name, tmp->name)) {
5386 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5387 	  "standalone: attribute enumeration value token %s duplicated\n",
5388 				 name, NULL);
5389 		if (!xmlDictOwns(ctxt->dict, name))
5390 		    xmlFree(name);
5391 		break;
5392 	    }
5393 	    tmp = tmp->next;
5394 	}
5395 	if (tmp == NULL) {
5396 	    cur = xmlCreateEnumeration(name);
5397 	    if (!xmlDictOwns(ctxt->dict, name))
5398 		xmlFree(name);
5399 	    if (cur == NULL) return(ret);
5400 	    if (last == NULL) ret = last = cur;
5401 	    else {
5402 		last->next = cur;
5403 		last = cur;
5404 	    }
5405 	}
5406 	SKIP_BLANKS;
5407     } while (RAW == '|');
5408     if (RAW != ')') {
5409 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5410 	return(ret);
5411     }
5412     NEXT;
5413     return(ret);
5414 }
5415 
5416 /**
5417  * xmlParseEnumeratedType:
5418  * @ctxt:  an XML parser context
5419  * @tree:  the enumeration tree built while parsing
5420  *
5421  * parse an Enumerated attribute type.
5422  *
5423  * [57] EnumeratedType ::= NotationType | Enumeration
5424  *
5425  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5426  *
5427  *
5428  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5429  */
5430 
5431 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5432 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5433     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5434 	SKIP(8);
5435 	if (!IS_BLANK_CH(CUR)) {
5436 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 			   "Space required after 'NOTATION'\n");
5438 	    return(0);
5439 	}
5440         SKIP_BLANKS;
5441 	*tree = xmlParseNotationType(ctxt);
5442 	if (*tree == NULL) return(0);
5443 	return(XML_ATTRIBUTE_NOTATION);
5444     }
5445     *tree = xmlParseEnumerationType(ctxt);
5446     if (*tree == NULL) return(0);
5447     return(XML_ATTRIBUTE_ENUMERATION);
5448 }
5449 
5450 /**
5451  * xmlParseAttributeType:
5452  * @ctxt:  an XML parser context
5453  * @tree:  the enumeration tree built while parsing
5454  *
5455  * parse the Attribute list def for an element
5456  *
5457  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5458  *
5459  * [55] StringType ::= 'CDATA'
5460  *
5461  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5462  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5463  *
5464  * Validity constraints for attribute values syntax are checked in
5465  * xmlValidateAttributeValue()
5466  *
5467  * [ VC: ID ]
5468  * Values of type ID must match the Name production. A name must not
5469  * appear more than once in an XML document as a value of this type;
5470  * i.e., ID values must uniquely identify the elements which bear them.
5471  *
5472  * [ VC: One ID per Element Type ]
5473  * No element type may have more than one ID attribute specified.
5474  *
5475  * [ VC: ID Attribute Default ]
5476  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5477  *
5478  * [ VC: IDREF ]
5479  * Values of type IDREF must match the Name production, and values
5480  * of type IDREFS must match Names; each IDREF Name must match the value
5481  * of an ID attribute on some element in the XML document; i.e. IDREF
5482  * values must match the value of some ID attribute.
5483  *
5484  * [ VC: Entity Name ]
5485  * Values of type ENTITY must match the Name production, values
5486  * of type ENTITIES must match Names; each Entity Name must match the
5487  * name of an unparsed entity declared in the DTD.
5488  *
5489  * [ VC: Name Token ]
5490  * Values of type NMTOKEN must match the Nmtoken production; values
5491  * of type NMTOKENS must match Nmtokens.
5492  *
5493  * Returns the attribute type
5494  */
5495 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5496 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5497     SHRINK;
5498     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5499 	SKIP(5);
5500 	return(XML_ATTRIBUTE_CDATA);
5501      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5502 	SKIP(6);
5503 	return(XML_ATTRIBUTE_IDREFS);
5504      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5505 	SKIP(5);
5506 	return(XML_ATTRIBUTE_IDREF);
5507      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5508         SKIP(2);
5509 	return(XML_ATTRIBUTE_ID);
5510      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5511 	SKIP(6);
5512 	return(XML_ATTRIBUTE_ENTITY);
5513      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5514 	SKIP(8);
5515 	return(XML_ATTRIBUTE_ENTITIES);
5516      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5517 	SKIP(8);
5518 	return(XML_ATTRIBUTE_NMTOKENS);
5519      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5520 	SKIP(7);
5521 	return(XML_ATTRIBUTE_NMTOKEN);
5522      }
5523      return(xmlParseEnumeratedType(ctxt, tree));
5524 }
5525 
5526 /**
5527  * xmlParseAttributeListDecl:
5528  * @ctxt:  an XML parser context
5529  *
5530  * : parse the Attribute list def for an element
5531  *
5532  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5533  *
5534  * [53] AttDef ::= S Name S AttType S DefaultDecl
5535  *
5536  */
5537 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5538 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5539     const xmlChar *elemName;
5540     const xmlChar *attrName;
5541     xmlEnumerationPtr tree;
5542 
5543     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5544 	xmlParserInputPtr input = ctxt->input;
5545 
5546 	SKIP(9);
5547 	if (!IS_BLANK_CH(CUR)) {
5548 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5549 		                 "Space required after '<!ATTLIST'\n");
5550 	}
5551         SKIP_BLANKS;
5552         elemName = xmlParseName(ctxt);
5553 	if (elemName == NULL) {
5554 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5555 			   "ATTLIST: no name for Element\n");
5556 	    return;
5557 	}
5558 	SKIP_BLANKS;
5559 	GROW;
5560 	while (RAW != '>') {
5561 	    const xmlChar *check = CUR_PTR;
5562 	    int type;
5563 	    int def;
5564 	    xmlChar *defaultValue = NULL;
5565 
5566 	    GROW;
5567             tree = NULL;
5568 	    attrName = xmlParseName(ctxt);
5569 	    if (attrName == NULL) {
5570 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5571 			       "ATTLIST: no name for Attribute\n");
5572 		break;
5573 	    }
5574 	    GROW;
5575 	    if (!IS_BLANK_CH(CUR)) {
5576 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5577 		        "Space required after the attribute name\n");
5578 		break;
5579 	    }
5580 	    SKIP_BLANKS;
5581 
5582 	    type = xmlParseAttributeType(ctxt, &tree);
5583 	    if (type <= 0) {
5584 	        break;
5585 	    }
5586 
5587 	    GROW;
5588 	    if (!IS_BLANK_CH(CUR)) {
5589 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5590 			       "Space required after the attribute type\n");
5591 	        if (tree != NULL)
5592 		    xmlFreeEnumeration(tree);
5593 		break;
5594 	    }
5595 	    SKIP_BLANKS;
5596 
5597 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
5598 	    if (def <= 0) {
5599                 if (defaultValue != NULL)
5600 		    xmlFree(defaultValue);
5601 	        if (tree != NULL)
5602 		    xmlFreeEnumeration(tree);
5603 	        break;
5604 	    }
5605 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5606 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
5607 
5608 	    GROW;
5609             if (RAW != '>') {
5610 		if (!IS_BLANK_CH(CUR)) {
5611 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5612 			"Space required after the attribute default value\n");
5613 		    if (defaultValue != NULL)
5614 			xmlFree(defaultValue);
5615 		    if (tree != NULL)
5616 			xmlFreeEnumeration(tree);
5617 		    break;
5618 		}
5619 		SKIP_BLANKS;
5620 	    }
5621 	    if (check == CUR_PTR) {
5622 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5623 		            "in xmlParseAttributeListDecl\n");
5624 		if (defaultValue != NULL)
5625 		    xmlFree(defaultValue);
5626 	        if (tree != NULL)
5627 		    xmlFreeEnumeration(tree);
5628 		break;
5629 	    }
5630 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5631 		(ctxt->sax->attributeDecl != NULL))
5632 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5633 	                        type, def, defaultValue, tree);
5634 	    else if (tree != NULL)
5635 		xmlFreeEnumeration(tree);
5636 
5637 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
5638 	        (def != XML_ATTRIBUTE_IMPLIED) &&
5639 		(def != XML_ATTRIBUTE_REQUIRED)) {
5640 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5641 	    }
5642 	    if (ctxt->sax2) {
5643 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5644 	    }
5645 	    if (defaultValue != NULL)
5646 	        xmlFree(defaultValue);
5647 	    GROW;
5648 	}
5649 	if (RAW == '>') {
5650 	    if (input != ctxt->input) {
5651 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5652     "Attribute list declaration doesn't start and stop in the same entity\n",
5653                                  NULL, NULL);
5654 	    }
5655 	    NEXT;
5656 	}
5657     }
5658 }
5659 
5660 /**
5661  * xmlParseElementMixedContentDecl:
5662  * @ctxt:  an XML parser context
5663  * @inputchk:  the input used for the current entity, needed for boundary checks
5664  *
5665  * parse the declaration for a Mixed Element content
5666  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5667  *
5668  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5669  *                '(' S? '#PCDATA' S? ')'
5670  *
5671  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5672  *
5673  * [ VC: No Duplicate Types ]
5674  * The same name must not appear more than once in a single
5675  * mixed-content declaration.
5676  *
5677  * returns: the list of the xmlElementContentPtr describing the element choices
5678  */
5679 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5680 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5681     xmlElementContentPtr ret = NULL, cur = NULL, n;
5682     const xmlChar *elem = NULL;
5683 
5684     GROW;
5685     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5686 	SKIP(7);
5687 	SKIP_BLANKS;
5688 	SHRINK;
5689 	if (RAW == ')') {
5690 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5691 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5692 "Element content declaration doesn't start and stop in the same entity\n",
5693                                  NULL, NULL);
5694 	    }
5695 	    NEXT;
5696 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5697 	    if (ret == NULL)
5698 	        return(NULL);
5699 	    if (RAW == '*') {
5700 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
5701 		NEXT;
5702 	    }
5703 	    return(ret);
5704 	}
5705 	if ((RAW == '(') || (RAW == '|')) {
5706 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5707 	    if (ret == NULL) return(NULL);
5708 	}
5709 	while (RAW == '|') {
5710 	    NEXT;
5711 	    if (elem == NULL) {
5712 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5713 		if (ret == NULL) return(NULL);
5714 		ret->c1 = cur;
5715 		if (cur != NULL)
5716 		    cur->parent = ret;
5717 		cur = ret;
5718 	    } else {
5719 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5720 		if (n == NULL) return(NULL);
5721 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5722 		if (n->c1 != NULL)
5723 		    n->c1->parent = n;
5724 	        cur->c2 = n;
5725 		if (n != NULL)
5726 		    n->parent = cur;
5727 		cur = n;
5728 	    }
5729 	    SKIP_BLANKS;
5730 	    elem = xmlParseName(ctxt);
5731 	    if (elem == NULL) {
5732 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5733 			"xmlParseElementMixedContentDecl : Name expected\n");
5734 		xmlFreeDocElementContent(ctxt->myDoc, cur);
5735 		return(NULL);
5736 	    }
5737 	    SKIP_BLANKS;
5738 	    GROW;
5739 	}
5740 	if ((RAW == ')') && (NXT(1) == '*')) {
5741 	    if (elem != NULL) {
5742 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5743 		                               XML_ELEMENT_CONTENT_ELEMENT);
5744 		if (cur->c2 != NULL)
5745 		    cur->c2->parent = cur;
5746             }
5747 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
5748 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5749 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5750 "Element content declaration doesn't start and stop in the same entity\n",
5751 				 NULL, NULL);
5752 	    }
5753 	    SKIP(2);
5754 	} else {
5755 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
5756 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5757 	    return(NULL);
5758 	}
5759 
5760     } else {
5761 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5762     }
5763     return(ret);
5764 }
5765 
5766 /**
5767  * xmlParseElementChildrenContentDecl:
5768  * @ctxt:  an XML parser context
5769  * @inputchk:  the input used for the current entity, needed for boundary checks
5770  *
5771  * parse the declaration for a Mixed Element content
5772  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5773  *
5774  *
5775  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5776  *
5777  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5778  *
5779  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5780  *
5781  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5782  *
5783  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5784  * TODO Parameter-entity replacement text must be properly nested
5785  *	with parenthesized groups. That is to say, if either of the
5786  *	opening or closing parentheses in a choice, seq, or Mixed
5787  *	construct is contained in the replacement text for a parameter
5788  *	entity, both must be contained in the same replacement text. For
5789  *	interoperability, if a parameter-entity reference appears in a
5790  *	choice, seq, or Mixed construct, its replacement text should not
5791  *	be empty, and neither the first nor last non-blank character of
5792  *	the replacement text should be a connector (| or ,).
5793  *
5794  * Returns the tree of xmlElementContentPtr describing the element
5795  *          hierarchy.
5796  */
5797 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5798 xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5799     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5800     const xmlChar *elem;
5801     xmlChar type = 0;
5802 
5803     SKIP_BLANKS;
5804     GROW;
5805     if (RAW == '(') {
5806 	int inputid = ctxt->input->id;
5807 
5808         /* Recurse on first child */
5809 	NEXT;
5810 	SKIP_BLANKS;
5811         cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5812 	SKIP_BLANKS;
5813 	GROW;
5814     } else {
5815 	elem = xmlParseName(ctxt);
5816 	if (elem == NULL) {
5817 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5818 	    return(NULL);
5819 	}
5820         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5821 	if (cur == NULL) {
5822 	    xmlErrMemory(ctxt, NULL);
5823 	    return(NULL);
5824 	}
5825 	GROW;
5826 	if (RAW == '?') {
5827 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
5828 	    NEXT;
5829 	} else if (RAW == '*') {
5830 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
5831 	    NEXT;
5832 	} else if (RAW == '+') {
5833 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5834 	    NEXT;
5835 	} else {
5836 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5837 	}
5838 	GROW;
5839     }
5840     SKIP_BLANKS;
5841     SHRINK;
5842     while (RAW != ')') {
5843         /*
5844 	 * Each loop we parse one separator and one element.
5845 	 */
5846         if (RAW == ',') {
5847 	    if (type == 0) type = CUR;
5848 
5849 	    /*
5850 	     * Detect "Name | Name , Name" error
5851 	     */
5852 	    else if (type != CUR) {
5853 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5854 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
5855 		                  type);
5856 		if ((last != NULL) && (last != ret))
5857 		    xmlFreeDocElementContent(ctxt->myDoc, last);
5858 		if (ret != NULL)
5859 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
5860 		return(NULL);
5861 	    }
5862 	    NEXT;
5863 
5864 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5865 	    if (op == NULL) {
5866 		if ((last != NULL) && (last != ret))
5867 		    xmlFreeDocElementContent(ctxt->myDoc, last);
5868 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
5869 		return(NULL);
5870 	    }
5871 	    if (last == NULL) {
5872 		op->c1 = ret;
5873 		if (ret != NULL)
5874 		    ret->parent = op;
5875 		ret = cur = op;
5876 	    } else {
5877 	        cur->c2 = op;
5878 		if (op != NULL)
5879 		    op->parent = cur;
5880 		op->c1 = last;
5881 		if (last != NULL)
5882 		    last->parent = op;
5883 		cur =op;
5884 		last = NULL;
5885 	    }
5886 	} else if (RAW == '|') {
5887 	    if (type == 0) type = CUR;
5888 
5889 	    /*
5890 	     * Detect "Name , Name | Name" error
5891 	     */
5892 	    else if (type != CUR) {
5893 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5894 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
5895 				  type);
5896 		if ((last != NULL) && (last != ret))
5897 		    xmlFreeDocElementContent(ctxt->myDoc, last);
5898 		if (ret != NULL)
5899 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
5900 		return(NULL);
5901 	    }
5902 	    NEXT;
5903 
5904 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5905 	    if (op == NULL) {
5906 		if ((last != NULL) && (last != ret))
5907 		    xmlFreeDocElementContent(ctxt->myDoc, last);
5908 		if (ret != NULL)
5909 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
5910 		return(NULL);
5911 	    }
5912 	    if (last == NULL) {
5913 		op->c1 = ret;
5914 		if (ret != NULL)
5915 		    ret->parent = op;
5916 		ret = cur = op;
5917 	    } else {
5918 	        cur->c2 = op;
5919 		if (op != NULL)
5920 		    op->parent = cur;
5921 		op->c1 = last;
5922 		if (last != NULL)
5923 		    last->parent = op;
5924 		cur =op;
5925 		last = NULL;
5926 	    }
5927 	} else {
5928 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5929 	    if ((last != NULL) && (last != ret))
5930 	        xmlFreeDocElementContent(ctxt->myDoc, last);
5931 	    if (ret != NULL)
5932 		xmlFreeDocElementContent(ctxt->myDoc, ret);
5933 	    return(NULL);
5934 	}
5935 	GROW;
5936 	SKIP_BLANKS;
5937 	GROW;
5938 	if (RAW == '(') {
5939 	    int inputid = ctxt->input->id;
5940 	    /* Recurse on second child */
5941 	    NEXT;
5942 	    SKIP_BLANKS;
5943 	    last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5944 	    SKIP_BLANKS;
5945 	} else {
5946 	    elem = xmlParseName(ctxt);
5947 	    if (elem == NULL) {
5948 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5949 		if (ret != NULL)
5950 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
5951 		return(NULL);
5952 	    }
5953 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5954 	    if (last == NULL) {
5955 		if (ret != NULL)
5956 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
5957 		return(NULL);
5958 	    }
5959 	    if (RAW == '?') {
5960 		last->ocur = XML_ELEMENT_CONTENT_OPT;
5961 		NEXT;
5962 	    } else if (RAW == '*') {
5963 		last->ocur = XML_ELEMENT_CONTENT_MULT;
5964 		NEXT;
5965 	    } else if (RAW == '+') {
5966 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
5967 		NEXT;
5968 	    } else {
5969 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
5970 	    }
5971 	}
5972 	SKIP_BLANKS;
5973 	GROW;
5974     }
5975     if ((cur != NULL) && (last != NULL)) {
5976         cur->c2 = last;
5977 	if (last != NULL)
5978 	    last->parent = cur;
5979     }
5980     if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5981 	xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5982 "Element content declaration doesn't start and stop in the same entity\n",
5983 			 NULL, NULL);
5984     }
5985     NEXT;
5986     if (RAW == '?') {
5987 	if (ret != NULL) {
5988 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5989 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5990 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
5991 	    else
5992 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
5993 	}
5994 	NEXT;
5995     } else if (RAW == '*') {
5996 	if (ret != NULL) {
5997 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
5998 	    cur = ret;
5999 	    /*
6000 	     * Some normalization:
6001 	     * (a | b* | c?)* == (a | b | c)*
6002 	     */
6003 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6004 		if ((cur->c1 != NULL) &&
6005 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6006 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6007 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6008 		if ((cur->c2 != NULL) &&
6009 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6010 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6011 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6012 		cur = cur->c2;
6013 	    }
6014 	}
6015 	NEXT;
6016     } else if (RAW == '+') {
6017 	if (ret != NULL) {
6018 	    int found = 0;
6019 
6020 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6021 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6022 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6023 	    else
6024 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6025 	    /*
6026 	     * Some normalization:
6027 	     * (a | b*)+ == (a | b)*
6028 	     * (a | b?)+ == (a | b)*
6029 	     */
6030 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6031 		if ((cur->c1 != NULL) &&
6032 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6033 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6034 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6035 		    found = 1;
6036 		}
6037 		if ((cur->c2 != NULL) &&
6038 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6039 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6040 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6041 		    found = 1;
6042 		}
6043 		cur = cur->c2;
6044 	    }
6045 	    if (found)
6046 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6047 	}
6048 	NEXT;
6049     }
6050     return(ret);
6051 }
6052 
6053 /**
6054  * xmlParseElementContentDecl:
6055  * @ctxt:  an XML parser context
6056  * @name:  the name of the element being defined.
6057  * @result:  the Element Content pointer will be stored here if any
6058  *
6059  * parse the declaration for an Element content either Mixed or Children,
6060  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6061  *
6062  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6063  *
6064  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6065  */
6066 
6067 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6068 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6069                            xmlElementContentPtr *result) {
6070 
6071     xmlElementContentPtr tree = NULL;
6072     int inputid = ctxt->input->id;
6073     int res;
6074 
6075     *result = NULL;
6076 
6077     if (RAW != '(') {
6078 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6079 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6080 	return(-1);
6081     }
6082     NEXT;
6083     GROW;
6084     SKIP_BLANKS;
6085     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6086         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6087 	res = XML_ELEMENT_TYPE_MIXED;
6088     } else {
6089         tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
6090 	res = XML_ELEMENT_TYPE_ELEMENT;
6091     }
6092     SKIP_BLANKS;
6093     *result = tree;
6094     return(res);
6095 }
6096 
6097 /**
6098  * xmlParseElementDecl:
6099  * @ctxt:  an XML parser context
6100  *
6101  * parse an Element declaration.
6102  *
6103  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6104  *
6105  * [ VC: Unique Element Type Declaration ]
6106  * No element type may be declared more than once
6107  *
6108  * Returns the type of the element, or -1 in case of error
6109  */
6110 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6111 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6112     const xmlChar *name;
6113     int ret = -1;
6114     xmlElementContentPtr content  = NULL;
6115 
6116     /* GROW; done in the caller */
6117     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6118 	xmlParserInputPtr input = ctxt->input;
6119 
6120 	SKIP(9);
6121 	if (!IS_BLANK_CH(CUR)) {
6122 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6123 		           "Space required after 'ELEMENT'\n");
6124 	}
6125         SKIP_BLANKS;
6126         name = xmlParseName(ctxt);
6127 	if (name == NULL) {
6128 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6129 			   "xmlParseElementDecl: no name for Element\n");
6130 	    return(-1);
6131 	}
6132 	while ((RAW == 0) && (ctxt->inputNr > 1))
6133 	    xmlPopInput(ctxt);
6134 	if (!IS_BLANK_CH(CUR)) {
6135 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6136 			   "Space required after the element name\n");
6137 	}
6138         SKIP_BLANKS;
6139 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6140 	    SKIP(5);
6141 	    /*
6142 	     * Element must always be empty.
6143 	     */
6144 	    ret = XML_ELEMENT_TYPE_EMPTY;
6145 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6146 	           (NXT(2) == 'Y')) {
6147 	    SKIP(3);
6148 	    /*
6149 	     * Element is a generic container.
6150 	     */
6151 	    ret = XML_ELEMENT_TYPE_ANY;
6152 	} else if (RAW == '(') {
6153 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6154 	} else {
6155 	    /*
6156 	     * [ WFC: PEs in Internal Subset ] error handling.
6157 	     */
6158 	    if ((RAW == '%') && (ctxt->external == 0) &&
6159 	        (ctxt->inputNr == 1)) {
6160 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6161 	  "PEReference: forbidden within markup decl in internal subset\n");
6162 	    } else {
6163 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6164 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6165             }
6166 	    return(-1);
6167 	}
6168 
6169 	SKIP_BLANKS;
6170 	/*
6171 	 * Pop-up of finished entities.
6172 	 */
6173 	while ((RAW == 0) && (ctxt->inputNr > 1))
6174 	    xmlPopInput(ctxt);
6175 	SKIP_BLANKS;
6176 
6177 	if (RAW != '>') {
6178 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6179 	    if (content != NULL) {
6180 		xmlFreeDocElementContent(ctxt->myDoc, content);
6181 	    }
6182 	} else {
6183 	    if (input != ctxt->input) {
6184 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6185     "Element declaration doesn't start and stop in the same entity\n");
6186 	    }
6187 
6188 	    NEXT;
6189 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6190 		(ctxt->sax->elementDecl != NULL)) {
6191 		if (content != NULL)
6192 		    content->parent = NULL;
6193 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6194 		                       content);
6195 		if ((content != NULL) && (content->parent == NULL)) {
6196 		    /*
6197 		     * this is a trick: if xmlAddElementDecl is called,
6198 		     * instead of copying the full tree it is plugged directly
6199 		     * if called from the parser. Avoid duplicating the
6200 		     * interfaces or change the API/ABI
6201 		     */
6202 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6203 		}
6204 	    } else if (content != NULL) {
6205 		xmlFreeDocElementContent(ctxt->myDoc, content);
6206 	    }
6207 	}
6208     }
6209     return(ret);
6210 }
6211 
6212 /**
6213  * xmlParseConditionalSections
6214  * @ctxt:  an XML parser context
6215  *
6216  * [61] conditionalSect ::= includeSect | ignoreSect
6217  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6218  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6219  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6220  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6221  */
6222 
6223 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6224 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6225     int id = ctxt->input->id;
6226 
6227     SKIP(3);
6228     SKIP_BLANKS;
6229     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6230 	SKIP(7);
6231 	SKIP_BLANKS;
6232 	if (RAW != '[') {
6233 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6234 	} else {
6235 	    if (ctxt->input->id != id) {
6236 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6237 	    "All markup of the conditional section is not in the same entity\n",
6238 				     NULL, NULL);
6239 	    }
6240 	    NEXT;
6241 	}
6242 	if (xmlParserDebugEntities) {
6243 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6244 		xmlGenericError(xmlGenericErrorContext,
6245 			"%s(%d): ", ctxt->input->filename,
6246 			ctxt->input->line);
6247 	    xmlGenericError(xmlGenericErrorContext,
6248 		    "Entering INCLUDE Conditional Section\n");
6249 	}
6250 
6251 	while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6252 	       (NXT(2) != '>'))) {
6253 	    const xmlChar *check = CUR_PTR;
6254 	    unsigned int cons = ctxt->input->consumed;
6255 
6256 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6257 		xmlParseConditionalSections(ctxt);
6258 	    } else if (IS_BLANK_CH(CUR)) {
6259 		NEXT;
6260 	    } else if (RAW == '%') {
6261 		xmlParsePEReference(ctxt);
6262 	    } else
6263 		xmlParseMarkupDecl(ctxt);
6264 
6265 	    /*
6266 	     * Pop-up of finished entities.
6267 	     */
6268 	    while ((RAW == 0) && (ctxt->inputNr > 1))
6269 		xmlPopInput(ctxt);
6270 
6271 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6272 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6273 		break;
6274 	    }
6275 	}
6276 	if (xmlParserDebugEntities) {
6277 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6278 		xmlGenericError(xmlGenericErrorContext,
6279 			"%s(%d): ", ctxt->input->filename,
6280 			ctxt->input->line);
6281 	    xmlGenericError(xmlGenericErrorContext,
6282 		    "Leaving INCLUDE Conditional Section\n");
6283 	}
6284 
6285     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6286 	int state;
6287 	xmlParserInputState instate;
6288 	int depth = 0;
6289 
6290 	SKIP(6);
6291 	SKIP_BLANKS;
6292 	if (RAW != '[') {
6293 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6294 	} else {
6295 	    if (ctxt->input->id != id) {
6296 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6297 	    "All markup of the conditional section is not in the same entity\n",
6298 				     NULL, NULL);
6299 	    }
6300 	    NEXT;
6301 	}
6302 	if (xmlParserDebugEntities) {
6303 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6304 		xmlGenericError(xmlGenericErrorContext,
6305 			"%s(%d): ", ctxt->input->filename,
6306 			ctxt->input->line);
6307 	    xmlGenericError(xmlGenericErrorContext,
6308 		    "Entering IGNORE Conditional Section\n");
6309 	}
6310 
6311 	/*
6312 	 * Parse up to the end of the conditional section
6313 	 * But disable SAX event generating DTD building in the meantime
6314 	 */
6315 	state = ctxt->disableSAX;
6316 	instate = ctxt->instate;
6317 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6318 	ctxt->instate = XML_PARSER_IGNORE;
6319 
6320 	while ((depth >= 0) && (RAW != 0)) {
6321 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6322 	    depth++;
6323 	    SKIP(3);
6324 	    continue;
6325 	  }
6326 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6327 	    if (--depth >= 0) SKIP(3);
6328 	    continue;
6329 	  }
6330 	  NEXT;
6331 	  continue;
6332 	}
6333 
6334 	ctxt->disableSAX = state;
6335 	ctxt->instate = instate;
6336 
6337 	if (xmlParserDebugEntities) {
6338 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6339 		xmlGenericError(xmlGenericErrorContext,
6340 			"%s(%d): ", ctxt->input->filename,
6341 			ctxt->input->line);
6342 	    xmlGenericError(xmlGenericErrorContext,
6343 		    "Leaving IGNORE Conditional Section\n");
6344 	}
6345 
6346     } else {
6347 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6348     }
6349 
6350     if (RAW == 0)
6351         SHRINK;
6352 
6353     if (RAW == 0) {
6354 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6355     } else {
6356 	if (ctxt->input->id != id) {
6357 	    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6358 	"All markup of the conditional section is not in the same entity\n",
6359 				 NULL, NULL);
6360 	}
6361         SKIP(3);
6362     }
6363 }
6364 
6365 /**
6366  * xmlParseMarkupDecl:
6367  * @ctxt:  an XML parser context
6368  *
6369  * parse Markup declarations
6370  *
6371  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6372  *                     NotationDecl | PI | Comment
6373  *
6374  * [ VC: Proper Declaration/PE Nesting ]
6375  * Parameter-entity replacement text must be properly nested with
6376  * markup declarations. That is to say, if either the first character
6377  * or the last character of a markup declaration (markupdecl above) is
6378  * contained in the replacement text for a parameter-entity reference,
6379  * both must be contained in the same replacement text.
6380  *
6381  * [ WFC: PEs in Internal Subset ]
6382  * In the internal DTD subset, parameter-entity references can occur
6383  * only where markup declarations can occur, not within markup declarations.
6384  * (This does not apply to references that occur in external parameter
6385  * entities or to the external subset.)
6386  */
6387 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6388 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6389     GROW;
6390     if (CUR == '<') {
6391         if (NXT(1) == '!') {
6392 	    switch (NXT(2)) {
6393 	        case 'E':
6394 		    if (NXT(3) == 'L')
6395 			xmlParseElementDecl(ctxt);
6396 		    else if (NXT(3) == 'N')
6397 			xmlParseEntityDecl(ctxt);
6398 		    break;
6399 	        case 'A':
6400 		    xmlParseAttributeListDecl(ctxt);
6401 		    break;
6402 	        case 'N':
6403 		    xmlParseNotationDecl(ctxt);
6404 		    break;
6405 	        case '-':
6406 		    xmlParseComment(ctxt);
6407 		    break;
6408 		default:
6409 		    /* there is an error but it will be detected later */
6410 		    break;
6411 	    }
6412 	} else if (NXT(1) == '?') {
6413 	    xmlParsePI(ctxt);
6414 	}
6415     }
6416     /*
6417      * This is only for internal subset. On external entities,
6418      * the replacement is done before parsing stage
6419      */
6420     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6421 	xmlParsePEReference(ctxt);
6422 
6423     /*
6424      * Conditional sections are allowed from entities included
6425      * by PE References in the internal subset.
6426      */
6427     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6428         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6429 	    xmlParseConditionalSections(ctxt);
6430 	}
6431     }
6432 
6433     ctxt->instate = XML_PARSER_DTD;
6434 }
6435 
6436 /**
6437  * xmlParseTextDecl:
6438  * @ctxt:  an XML parser context
6439  *
6440  * parse an XML declaration header for external entities
6441  *
6442  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6443  */
6444 
6445 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6446 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6447     xmlChar *version;
6448     const xmlChar *encoding;
6449 
6450     /*
6451      * We know that '<?xml' is here.
6452      */
6453     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6454 	SKIP(5);
6455     } else {
6456 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6457 	return;
6458     }
6459 
6460     if (!IS_BLANK_CH(CUR)) {
6461 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6462 		       "Space needed after '<?xml'\n");
6463     }
6464     SKIP_BLANKS;
6465 
6466     /*
6467      * We may have the VersionInfo here.
6468      */
6469     version = xmlParseVersionInfo(ctxt);
6470     if (version == NULL)
6471 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6472     else {
6473 	if (!IS_BLANK_CH(CUR)) {
6474 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6475 		           "Space needed here\n");
6476 	}
6477     }
6478     ctxt->input->version = version;
6479 
6480     /*
6481      * We must have the encoding declaration
6482      */
6483     encoding = xmlParseEncodingDecl(ctxt);
6484     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6485 	/*
6486 	 * The XML REC instructs us to stop parsing right here
6487 	 */
6488         return;
6489     }
6490     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6491 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6492 		       "Missing encoding in text declaration\n");
6493     }
6494 
6495     SKIP_BLANKS;
6496     if ((RAW == '?') && (NXT(1) == '>')) {
6497         SKIP(2);
6498     } else if (RAW == '>') {
6499         /* Deprecated old WD ... */
6500 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6501 	NEXT;
6502     } else {
6503 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6504 	MOVETO_ENDTAG(CUR_PTR);
6505 	NEXT;
6506     }
6507 }
6508 
6509 /**
6510  * xmlParseExternalSubset:
6511  * @ctxt:  an XML parser context
6512  * @ExternalID: the external identifier
6513  * @SystemID: the system identifier (or URL)
6514  *
6515  * parse Markup declarations from an external subset
6516  *
6517  * [30] extSubset ::= textDecl? extSubsetDecl
6518  *
6519  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6520  */
6521 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6522 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6523                        const xmlChar *SystemID) {
6524     xmlDetectSAX2(ctxt);
6525     GROW;
6526 
6527     if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6528         (ctxt->input->end - ctxt->input->cur >= 4)) {
6529         xmlChar start[4];
6530 	xmlCharEncoding enc;
6531 
6532 	start[0] = RAW;
6533 	start[1] = NXT(1);
6534 	start[2] = NXT(2);
6535 	start[3] = NXT(3);
6536 	enc = xmlDetectCharEncoding(start, 4);
6537 	if (enc != XML_CHAR_ENCODING_NONE)
6538 	    xmlSwitchEncoding(ctxt, enc);
6539     }
6540 
6541     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6542 	xmlParseTextDecl(ctxt);
6543 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6544 	    /*
6545 	     * The XML REC instructs us to stop parsing right here
6546 	     */
6547 	    ctxt->instate = XML_PARSER_EOF;
6548 	    return;
6549 	}
6550     }
6551     if (ctxt->myDoc == NULL) {
6552         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6553 	if (ctxt->myDoc == NULL) {
6554 	    xmlErrMemory(ctxt, "New Doc failed");
6555 	    return;
6556 	}
6557 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
6558     }
6559     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6560         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6561 
6562     ctxt->instate = XML_PARSER_DTD;
6563     ctxt->external = 1;
6564     while (((RAW == '<') && (NXT(1) == '?')) ||
6565            ((RAW == '<') && (NXT(1) == '!')) ||
6566 	   (RAW == '%') || IS_BLANK_CH(CUR)) {
6567 	const xmlChar *check = CUR_PTR;
6568 	unsigned int cons = ctxt->input->consumed;
6569 
6570 	GROW;
6571         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6572 	    xmlParseConditionalSections(ctxt);
6573 	} else if (IS_BLANK_CH(CUR)) {
6574 	    NEXT;
6575 	} else if (RAW == '%') {
6576             xmlParsePEReference(ctxt);
6577 	} else
6578 	    xmlParseMarkupDecl(ctxt);
6579 
6580 	/*
6581 	 * Pop-up of finished entities.
6582 	 */
6583 	while ((RAW == 0) && (ctxt->inputNr > 1))
6584 	    xmlPopInput(ctxt);
6585 
6586 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6587 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6588 	    break;
6589 	}
6590     }
6591 
6592     if (RAW != 0) {
6593 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6594     }
6595 
6596 }
6597 
6598 /**
6599  * xmlParseReference:
6600  * @ctxt:  an XML parser context
6601  *
6602  * parse and handle entity references in content, depending on the SAX
6603  * interface, this may end-up in a call to character() if this is a
6604  * CharRef, a predefined entity, if there is no reference() callback.
6605  * or if the parser was asked to switch to that mode.
6606  *
6607  * [67] Reference ::= EntityRef | CharRef
6608  */
6609 void
xmlParseReference(xmlParserCtxtPtr ctxt)6610 xmlParseReference(xmlParserCtxtPtr ctxt) {
6611     xmlEntityPtr ent;
6612     xmlChar *val;
6613     int was_checked;
6614     xmlNodePtr list = NULL;
6615     xmlParserErrors ret = XML_ERR_OK;
6616 
6617 
6618     if (RAW != '&')
6619         return;
6620 
6621     /*
6622      * Simple case of a CharRef
6623      */
6624     if (NXT(1) == '#') {
6625 	int i = 0;
6626 	xmlChar out[10];
6627 	int hex = NXT(2);
6628 	int value = xmlParseCharRef(ctxt);
6629 
6630 	if (value == 0)
6631 	    return;
6632 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6633 	    /*
6634 	     * So we are using non-UTF-8 buffers
6635 	     * Check that the char fit on 8bits, if not
6636 	     * generate a CharRef.
6637 	     */
6638 	    if (value <= 0xFF) {
6639 		out[0] = value;
6640 		out[1] = 0;
6641 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6642 		    (!ctxt->disableSAX))
6643 		    ctxt->sax->characters(ctxt->userData, out, 1);
6644 	    } else {
6645 		if ((hex == 'x') || (hex == 'X'))
6646 		    snprintf((char *)out, sizeof(out), "#x%X", value);
6647 		else
6648 		    snprintf((char *)out, sizeof(out), "#%d", value);
6649 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6650 		    (!ctxt->disableSAX))
6651 		    ctxt->sax->reference(ctxt->userData, out);
6652 	    }
6653 	} else {
6654 	    /*
6655 	     * Just encode the value in UTF-8
6656 	     */
6657 	    COPY_BUF(0 ,out, i, value);
6658 	    out[i] = 0;
6659 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6660 		(!ctxt->disableSAX))
6661 		ctxt->sax->characters(ctxt->userData, out, i);
6662 	}
6663 	return;
6664     }
6665 
6666     /*
6667      * We are seeing an entity reference
6668      */
6669     ent = xmlParseEntityRef(ctxt);
6670     if (ent == NULL) return;
6671     if (!ctxt->wellFormed)
6672 	return;
6673     was_checked = ent->checked;
6674 
6675     /* special case of predefined entities */
6676     if ((ent->name == NULL) ||
6677         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6678 	val = ent->content;
6679 	if (val == NULL) return;
6680 	/*
6681 	 * inline the entity.
6682 	 */
6683 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6684 	    (!ctxt->disableSAX))
6685 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6686 	return;
6687     }
6688 
6689     /*
6690      * The first reference to the entity trigger a parsing phase
6691      * where the ent->children is filled with the result from
6692      * the parsing.
6693      */
6694     if (ent->checked == 0) {
6695 	unsigned long oldnbent = ctxt->nbentities;
6696 
6697 	/*
6698 	 * This is a bit hackish but this seems the best
6699 	 * way to make sure both SAX and DOM entity support
6700 	 * behaves okay.
6701 	 */
6702 	void *user_data;
6703 	if (ctxt->userData == ctxt)
6704 	    user_data = NULL;
6705 	else
6706 	    user_data = ctxt->userData;
6707 
6708 	/*
6709 	 * Check that this entity is well formed
6710 	 * 4.3.2: An internal general parsed entity is well-formed
6711 	 * if its replacement text matches the production labeled
6712 	 * content.
6713 	 */
6714 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6715 	    ctxt->depth++;
6716 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6717 	                                              user_data, &list);
6718 	    ctxt->depth--;
6719 
6720 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6721 	    ctxt->depth++;
6722 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6723 	                                   user_data, ctxt->depth, ent->URI,
6724 					   ent->ExternalID, &list);
6725 	    ctxt->depth--;
6726 	} else {
6727 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
6728 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6729 			 "invalid entity type found\n", NULL);
6730 	}
6731 
6732 	/*
6733 	 * Store the number of entities needing parsing for this entity
6734 	 * content and do checkings
6735 	 */
6736 	ent->checked = ctxt->nbentities - oldnbent;
6737 	if (ret == XML_ERR_ENTITY_LOOP) {
6738 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6739 	    xmlFreeNodeList(list);
6740 	    return;
6741 	}
6742 	if (xmlParserEntityCheck(ctxt, 0, ent)) {
6743 	    xmlFreeNodeList(list);
6744 	    return;
6745 	}
6746 
6747 	if ((ret == XML_ERR_OK) && (list != NULL)) {
6748 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6749 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6750 		(ent->children == NULL)) {
6751 		ent->children = list;
6752 		if (ctxt->replaceEntities) {
6753 		    /*
6754 		     * Prune it directly in the generated document
6755 		     * except for single text nodes.
6756 		     */
6757 		    if (((list->type == XML_TEXT_NODE) &&
6758 			 (list->next == NULL)) ||
6759 			(ctxt->parseMode == XML_PARSE_READER)) {
6760 			list->parent = (xmlNodePtr) ent;
6761 			list = NULL;
6762 			ent->owner = 1;
6763 		    } else {
6764 			ent->owner = 0;
6765 			while (list != NULL) {
6766 			    list->parent = (xmlNodePtr) ctxt->node;
6767 			    list->doc = ctxt->myDoc;
6768 			    if (list->next == NULL)
6769 				ent->last = list;
6770 			    list = list->next;
6771 			}
6772 			list = ent->children;
6773 #ifdef LIBXML_LEGACY_ENABLED
6774 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6775 			  xmlAddEntityReference(ent, list, NULL);
6776 #endif /* LIBXML_LEGACY_ENABLED */
6777 		    }
6778 		} else {
6779 		    ent->owner = 1;
6780 		    while (list != NULL) {
6781 			list->parent = (xmlNodePtr) ent;
6782 			if (list->next == NULL)
6783 			    ent->last = list;
6784 			list = list->next;
6785 		    }
6786 		}
6787 	    } else {
6788 		xmlFreeNodeList(list);
6789 		list = NULL;
6790 	    }
6791 	} else if ((ret != XML_ERR_OK) &&
6792 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
6793 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6794 		     "Entity '%s' failed to parse\n", ent->name);
6795 	} else if (list != NULL) {
6796 	    xmlFreeNodeList(list);
6797 	    list = NULL;
6798 	}
6799 	if (ent->checked == 0)
6800 	    ent->checked = 1;
6801     } else if (ent->checked != 1) {
6802 	ctxt->nbentities += ent->checked;
6803     }
6804 
6805     /*
6806      * Now that the entity content has been gathered
6807      * provide it to the application, this can take different forms based
6808      * on the parsing modes.
6809      */
6810     if (ent->children == NULL) {
6811 	/*
6812 	 * Probably running in SAX mode and the callbacks don't
6813 	 * build the entity content. So unless we already went
6814 	 * though parsing for first checking go though the entity
6815 	 * content to generate callbacks associated to the entity
6816 	 */
6817 	if (was_checked != 0) {
6818 	    void *user_data;
6819 	    /*
6820 	     * This is a bit hackish but this seems the best
6821 	     * way to make sure both SAX and DOM entity support
6822 	     * behaves okay.
6823 	     */
6824 	    if (ctxt->userData == ctxt)
6825 		user_data = NULL;
6826 	    else
6827 		user_data = ctxt->userData;
6828 
6829 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6830 		ctxt->depth++;
6831 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6832 				   ent->content, user_data, NULL);
6833 		ctxt->depth--;
6834 	    } else if (ent->etype ==
6835 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6836 		ctxt->depth++;
6837 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6838 			   ctxt->sax, user_data, ctxt->depth,
6839 			   ent->URI, ent->ExternalID, NULL);
6840 		ctxt->depth--;
6841 	    } else {
6842 		ret = XML_ERR_ENTITY_PE_INTERNAL;
6843 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6844 			     "invalid entity type found\n", NULL);
6845 	    }
6846 	    if (ret == XML_ERR_ENTITY_LOOP) {
6847 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6848 		return;
6849 	    }
6850 	}
6851 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6852 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6853 	    /*
6854 	     * Entity reference callback comes second, it's somewhat
6855 	     * superfluous but a compatibility to historical behaviour
6856 	     */
6857 	    ctxt->sax->reference(ctxt->userData, ent->name);
6858 	}
6859 	return;
6860     }
6861 
6862     /*
6863      * If we didn't get any children for the entity being built
6864      */
6865     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6866 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6867 	/*
6868 	 * Create a node.
6869 	 */
6870 	ctxt->sax->reference(ctxt->userData, ent->name);
6871 	return;
6872     }
6873 
6874     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
6875 	/*
6876 	 * There is a problem on the handling of _private for entities
6877 	 * (bug 155816): Should we copy the content of the field from
6878 	 * the entity (possibly overwriting some value set by the user
6879 	 * when a copy is created), should we leave it alone, or should
6880 	 * we try to take care of different situations?  The problem
6881 	 * is exacerbated by the usage of this field by the xmlReader.
6882 	 * To fix this bug, we look at _private on the created node
6883 	 * and, if it's NULL, we copy in whatever was in the entity.
6884 	 * If it's not NULL we leave it alone.  This is somewhat of a
6885 	 * hack - maybe we should have further tests to determine
6886 	 * what to do.
6887 	 */
6888 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
6889 	    /*
6890 	     * Seems we are generating the DOM content, do
6891 	     * a simple tree copy for all references except the first
6892 	     * In the first occurrence list contains the replacement.
6893 	     * progressive == 2 means we are operating on the Reader
6894 	     * and since nodes are discarded we must copy all the time.
6895 	     */
6896 	    if (((list == NULL) && (ent->owner == 0)) ||
6897 		(ctxt->parseMode == XML_PARSE_READER)) {
6898 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
6899 
6900 		/*
6901 		 * when operating on a reader, the entities definitions
6902 		 * are always owning the entities subtree.
6903 		if (ctxt->parseMode == XML_PARSE_READER)
6904 		    ent->owner = 1;
6905 		 */
6906 
6907 		cur = ent->children;
6908 		while (cur != NULL) {
6909 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6910 		    if (nw != NULL) {
6911 			if (nw->_private == NULL)
6912 			    nw->_private = cur->_private;
6913 			if (firstChild == NULL){
6914 			    firstChild = nw;
6915 			}
6916 			nw = xmlAddChild(ctxt->node, nw);
6917 		    }
6918 		    if (cur == ent->last) {
6919 			/*
6920 			 * needed to detect some strange empty
6921 			 * node cases in the reader tests
6922 			 */
6923 			if ((ctxt->parseMode == XML_PARSE_READER) &&
6924 			    (nw != NULL) &&
6925 			    (nw->type == XML_ELEMENT_NODE) &&
6926 			    (nw->children == NULL))
6927 			    nw->extra = 1;
6928 
6929 			break;
6930 		    }
6931 		    cur = cur->next;
6932 		}
6933 #ifdef LIBXML_LEGACY_ENABLED
6934 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6935 		  xmlAddEntityReference(ent, firstChild, nw);
6936 #endif /* LIBXML_LEGACY_ENABLED */
6937 	    } else if (list == NULL) {
6938 		xmlNodePtr nw = NULL, cur, next, last,
6939 			   firstChild = NULL;
6940 		/*
6941 		 * Copy the entity child list and make it the new
6942 		 * entity child list. The goal is to make sure any
6943 		 * ID or REF referenced will be the one from the
6944 		 * document content and not the entity copy.
6945 		 */
6946 		cur = ent->children;
6947 		ent->children = NULL;
6948 		last = ent->last;
6949 		ent->last = NULL;
6950 		while (cur != NULL) {
6951 		    next = cur->next;
6952 		    cur->next = NULL;
6953 		    cur->parent = NULL;
6954 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6955 		    if (nw != NULL) {
6956 			if (nw->_private == NULL)
6957 			    nw->_private = cur->_private;
6958 			if (firstChild == NULL){
6959 			    firstChild = cur;
6960 			}
6961 			xmlAddChild((xmlNodePtr) ent, nw);
6962 			xmlAddChild(ctxt->node, cur);
6963 		    }
6964 		    if (cur == last)
6965 			break;
6966 		    cur = next;
6967 		}
6968 		if (ent->owner == 0)
6969 		    ent->owner = 1;
6970 #ifdef LIBXML_LEGACY_ENABLED
6971 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6972 		  xmlAddEntityReference(ent, firstChild, nw);
6973 #endif /* LIBXML_LEGACY_ENABLED */
6974 	    } else {
6975 		const xmlChar *nbktext;
6976 
6977 		/*
6978 		 * the name change is to avoid coalescing of the
6979 		 * node with a possible previous text one which
6980 		 * would make ent->children a dangling pointer
6981 		 */
6982 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6983 					-1);
6984 		if (ent->children->type == XML_TEXT_NODE)
6985 		    ent->children->name = nbktext;
6986 		if ((ent->last != ent->children) &&
6987 		    (ent->last->type == XML_TEXT_NODE))
6988 		    ent->last->name = nbktext;
6989 		xmlAddChildList(ctxt->node, ent->children);
6990 	    }
6991 
6992 	    /*
6993 	     * This is to avoid a nasty side effect, see
6994 	     * characters() in SAX.c
6995 	     */
6996 	    ctxt->nodemem = 0;
6997 	    ctxt->nodelen = 0;
6998 	    return;
6999 	}
7000     }
7001 }
7002 
7003 /**
7004  * xmlParseEntityRef:
7005  * @ctxt:  an XML parser context
7006  *
7007  * parse ENTITY references declarations
7008  *
7009  * [68] EntityRef ::= '&' Name ';'
7010  *
7011  * [ WFC: Entity Declared ]
7012  * In a document without any DTD, a document with only an internal DTD
7013  * subset which contains no parameter entity references, or a document
7014  * with "standalone='yes'", the Name given in the entity reference
7015  * must match that in an entity declaration, except that well-formed
7016  * documents need not declare any of the following entities: amp, lt,
7017  * gt, apos, quot.  The declaration of a parameter entity must precede
7018  * any reference to it.  Similarly, the declaration of a general entity
7019  * must precede any reference to it which appears in a default value in an
7020  * attribute-list declaration. Note that if entities are declared in the
7021  * external subset or in external parameter entities, a non-validating
7022  * processor is not obligated to read and process their declarations;
7023  * for such documents, the rule that an entity must be declared is a
7024  * well-formedness constraint only if standalone='yes'.
7025  *
7026  * [ WFC: Parsed Entity ]
7027  * An entity reference must not contain the name of an unparsed entity
7028  *
7029  * Returns the xmlEntityPtr if found, or NULL otherwise.
7030  */
7031 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7032 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7033     const xmlChar *name;
7034     xmlEntityPtr ent = NULL;
7035 
7036     GROW;
7037 
7038     if (RAW != '&')
7039         return(NULL);
7040     NEXT;
7041     name = xmlParseName(ctxt);
7042     if (name == NULL) {
7043 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7044 		       "xmlParseEntityRef: no name\n");
7045         return(NULL);
7046     }
7047     if (RAW != ';') {
7048 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7049 	return(NULL);
7050     }
7051     NEXT;
7052 
7053     /*
7054      * Predefined entites override any extra definition
7055      */
7056     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7057         ent = xmlGetPredefinedEntity(name);
7058         if (ent != NULL)
7059             return(ent);
7060     }
7061 
7062     /*
7063      * Increate the number of entity references parsed
7064      */
7065     ctxt->nbentities++;
7066 
7067     /*
7068      * Ask first SAX for entity resolution, otherwise try the
7069      * entities which may have stored in the parser context.
7070      */
7071     if (ctxt->sax != NULL) {
7072 	if (ctxt->sax->getEntity != NULL)
7073 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7074 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7075 	    (ctxt->options & XML_PARSE_OLDSAX))
7076 	    ent = xmlGetPredefinedEntity(name);
7077 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7078 	    (ctxt->userData==ctxt)) {
7079 	    ent = xmlSAX2GetEntity(ctxt, name);
7080 	}
7081     }
7082     /*
7083      * [ WFC: Entity Declared ]
7084      * In a document without any DTD, a document with only an
7085      * internal DTD subset which contains no parameter entity
7086      * references, or a document with "standalone='yes'", the
7087      * Name given in the entity reference must match that in an
7088      * entity declaration, except that well-formed documents
7089      * need not declare any of the following entities: amp, lt,
7090      * gt, apos, quot.
7091      * The declaration of a parameter entity must precede any
7092      * reference to it.
7093      * Similarly, the declaration of a general entity must
7094      * precede any reference to it which appears in a default
7095      * value in an attribute-list declaration. Note that if
7096      * entities are declared in the external subset or in
7097      * external parameter entities, a non-validating processor
7098      * is not obligated to read and process their declarations;
7099      * for such documents, the rule that an entity must be
7100      * declared is a well-formedness constraint only if
7101      * standalone='yes'.
7102      */
7103     if (ent == NULL) {
7104 	if ((ctxt->standalone == 1) ||
7105 	    ((ctxt->hasExternalSubset == 0) &&
7106 	     (ctxt->hasPErefs == 0))) {
7107 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7108 		     "Entity '%s' not defined\n", name);
7109 	} else {
7110 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7111 		     "Entity '%s' not defined\n", name);
7112 	    if ((ctxt->inSubset == 0) &&
7113 		(ctxt->sax != NULL) &&
7114 		(ctxt->sax->reference != NULL)) {
7115 		ctxt->sax->reference(ctxt->userData, name);
7116 	    }
7117 	}
7118 	ctxt->valid = 0;
7119     }
7120 
7121     /*
7122      * [ WFC: Parsed Entity ]
7123      * An entity reference must not contain the name of an
7124      * unparsed entity
7125      */
7126     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7127 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7128 		 "Entity reference to unparsed entity %s\n", name);
7129     }
7130 
7131     /*
7132      * [ WFC: No External Entity References ]
7133      * Attribute values cannot contain direct or indirect
7134      * entity references to external entities.
7135      */
7136     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7137 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7138 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7139 	     "Attribute references external entity '%s'\n", name);
7140     }
7141     /*
7142      * [ WFC: No < in Attribute Values ]
7143      * The replacement text of any entity referred to directly or
7144      * indirectly in an attribute value (other than "&lt;") must
7145      * not contain a <.
7146      */
7147     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7148 	     (ent != NULL) && (ent->content != NULL) &&
7149 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7150 	     (xmlStrchr(ent->content, '<'))) {
7151 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7152     "'<' in entity '%s' is not allowed in attributes values\n", name);
7153     }
7154 
7155     /*
7156      * Internal check, no parameter entities here ...
7157      */
7158     else {
7159 	switch (ent->etype) {
7160 	    case XML_INTERNAL_PARAMETER_ENTITY:
7161 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7162 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7163 	     "Attempt to reference the parameter entity '%s'\n",
7164 			      name);
7165 	    break;
7166 	    default:
7167 	    break;
7168 	}
7169     }
7170 
7171     /*
7172      * [ WFC: No Recursion ]
7173      * A parsed entity must not contain a recursive reference
7174      * to itself, either directly or indirectly.
7175      * Done somewhere else
7176      */
7177     return(ent);
7178 }
7179 
7180 /**
7181  * xmlParseStringEntityRef:
7182  * @ctxt:  an XML parser context
7183  * @str:  a pointer to an index in the string
7184  *
7185  * parse ENTITY references declarations, but this version parses it from
7186  * a string value.
7187  *
7188  * [68] EntityRef ::= '&' Name ';'
7189  *
7190  * [ WFC: Entity Declared ]
7191  * In a document without any DTD, a document with only an internal DTD
7192  * subset which contains no parameter entity references, or a document
7193  * with "standalone='yes'", the Name given in the entity reference
7194  * must match that in an entity declaration, except that well-formed
7195  * documents need not declare any of the following entities: amp, lt,
7196  * gt, apos, quot.  The declaration of a parameter entity must precede
7197  * any reference to it.  Similarly, the declaration of a general entity
7198  * must precede any reference to it which appears in a default value in an
7199  * attribute-list declaration. Note that if entities are declared in the
7200  * external subset or in external parameter entities, a non-validating
7201  * processor is not obligated to read and process their declarations;
7202  * for such documents, the rule that an entity must be declared is a
7203  * well-formedness constraint only if standalone='yes'.
7204  *
7205  * [ WFC: Parsed Entity ]
7206  * An entity reference must not contain the name of an unparsed entity
7207  *
7208  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7209  * is updated to the current location in the string.
7210  */
7211 xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7212 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7213     xmlChar *name;
7214     const xmlChar *ptr;
7215     xmlChar cur;
7216     xmlEntityPtr ent = NULL;
7217 
7218     if ((str == NULL) || (*str == NULL))
7219         return(NULL);
7220     ptr = *str;
7221     cur = *ptr;
7222     if (cur != '&')
7223 	return(NULL);
7224 
7225     ptr++;
7226     cur = *ptr;
7227     name = xmlParseStringName(ctxt, &ptr);
7228     if (name == NULL) {
7229 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7230 		       "xmlParseStringEntityRef: no name\n");
7231 	*str = ptr;
7232 	return(NULL);
7233     }
7234     if (*ptr != ';') {
7235 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7236         xmlFree(name);
7237 	*str = ptr;
7238 	return(NULL);
7239     }
7240     ptr++;
7241 
7242 
7243     /*
7244      * Predefined entites override any extra definition
7245      */
7246     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7247         ent = xmlGetPredefinedEntity(name);
7248         if (ent != NULL) {
7249             xmlFree(name);
7250             *str = ptr;
7251             return(ent);
7252         }
7253     }
7254 
7255     /*
7256      * Increate the number of entity references parsed
7257      */
7258     ctxt->nbentities++;
7259 
7260     /*
7261      * Ask first SAX for entity resolution, otherwise try the
7262      * entities which may have stored in the parser context.
7263      */
7264     if (ctxt->sax != NULL) {
7265 	if (ctxt->sax->getEntity != NULL)
7266 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7267 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7268 	    ent = xmlGetPredefinedEntity(name);
7269 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7270 	    ent = xmlSAX2GetEntity(ctxt, name);
7271 	}
7272     }
7273 
7274     /*
7275      * [ WFC: Entity Declared ]
7276      * In a document without any DTD, a document with only an
7277      * internal DTD subset which contains no parameter entity
7278      * references, or a document with "standalone='yes'", the
7279      * Name given in the entity reference must match that in an
7280      * entity declaration, except that well-formed documents
7281      * need not declare any of the following entities: amp, lt,
7282      * gt, apos, quot.
7283      * The declaration of a parameter entity must precede any
7284      * reference to it.
7285      * Similarly, the declaration of a general entity must
7286      * precede any reference to it which appears in a default
7287      * value in an attribute-list declaration. Note that if
7288      * entities are declared in the external subset or in
7289      * external parameter entities, a non-validating processor
7290      * is not obligated to read and process their declarations;
7291      * for such documents, the rule that an entity must be
7292      * declared is a well-formedness constraint only if
7293      * standalone='yes'.
7294      */
7295     if (ent == NULL) {
7296 	if ((ctxt->standalone == 1) ||
7297 	    ((ctxt->hasExternalSubset == 0) &&
7298 	     (ctxt->hasPErefs == 0))) {
7299 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7300 		     "Entity '%s' not defined\n", name);
7301 	} else {
7302 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7303 			  "Entity '%s' not defined\n",
7304 			  name);
7305 	}
7306 	/* TODO ? check regressions ctxt->valid = 0; */
7307     }
7308 
7309     /*
7310      * [ WFC: Parsed Entity ]
7311      * An entity reference must not contain the name of an
7312      * unparsed entity
7313      */
7314     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7315 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7316 		 "Entity reference to unparsed entity %s\n", name);
7317     }
7318 
7319     /*
7320      * [ WFC: No External Entity References ]
7321      * Attribute values cannot contain direct or indirect
7322      * entity references to external entities.
7323      */
7324     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7325 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7326 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7327 	 "Attribute references external entity '%s'\n", name);
7328     }
7329     /*
7330      * [ WFC: No < in Attribute Values ]
7331      * The replacement text of any entity referred to directly or
7332      * indirectly in an attribute value (other than "&lt;") must
7333      * not contain a <.
7334      */
7335     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7336 	     (ent != NULL) && (ent->content != NULL) &&
7337 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7338 	     (xmlStrchr(ent->content, '<'))) {
7339 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7340      "'<' in entity '%s' is not allowed in attributes values\n",
7341 			  name);
7342     }
7343 
7344     /*
7345      * Internal check, no parameter entities here ...
7346      */
7347     else {
7348 	switch (ent->etype) {
7349 	    case XML_INTERNAL_PARAMETER_ENTITY:
7350 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7351 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7352 	     "Attempt to reference the parameter entity '%s'\n",
7353 				  name);
7354 	    break;
7355 	    default:
7356 	    break;
7357 	}
7358     }
7359 
7360     /*
7361      * [ WFC: No Recursion ]
7362      * A parsed entity must not contain a recursive reference
7363      * to itself, either directly or indirectly.
7364      * Done somewhere else
7365      */
7366 
7367     xmlFree(name);
7368     *str = ptr;
7369     return(ent);
7370 }
7371 
7372 /**
7373  * xmlParsePEReference:
7374  * @ctxt:  an XML parser context
7375  *
7376  * parse PEReference declarations
7377  * The entity content is handled directly by pushing it's content as
7378  * a new input stream.
7379  *
7380  * [69] PEReference ::= '%' Name ';'
7381  *
7382  * [ WFC: No Recursion ]
7383  * A parsed entity must not contain a recursive
7384  * reference to itself, either directly or indirectly.
7385  *
7386  * [ WFC: Entity Declared ]
7387  * In a document without any DTD, a document with only an internal DTD
7388  * subset which contains no parameter entity references, or a document
7389  * with "standalone='yes'", ...  ... The declaration of a parameter
7390  * entity must precede any reference to it...
7391  *
7392  * [ VC: Entity Declared ]
7393  * In a document with an external subset or external parameter entities
7394  * with "standalone='no'", ...  ... The declaration of a parameter entity
7395  * must precede any reference to it...
7396  *
7397  * [ WFC: In DTD ]
7398  * Parameter-entity references may only appear in the DTD.
7399  * NOTE: misleading but this is handled.
7400  */
7401 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7402 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7403 {
7404     const xmlChar *name;
7405     xmlEntityPtr entity = NULL;
7406     xmlParserInputPtr input;
7407 
7408     if (RAW != '%')
7409         return;
7410     NEXT;
7411     name = xmlParseName(ctxt);
7412     if (name == NULL) {
7413 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7414 		       "xmlParsePEReference: no name\n");
7415 	return;
7416     }
7417     if (RAW != ';') {
7418 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7419         return;
7420     }
7421 
7422     NEXT;
7423 
7424     /*
7425      * Increate the number of entity references parsed
7426      */
7427     ctxt->nbentities++;
7428 
7429     /*
7430      * Request the entity from SAX
7431      */
7432     if ((ctxt->sax != NULL) &&
7433 	(ctxt->sax->getParameterEntity != NULL))
7434 	entity = ctxt->sax->getParameterEntity(ctxt->userData,
7435 					       name);
7436     if (entity == NULL) {
7437 	/*
7438 	 * [ WFC: Entity Declared ]
7439 	 * In a document without any DTD, a document with only an
7440 	 * internal DTD subset which contains no parameter entity
7441 	 * references, or a document with "standalone='yes'", ...
7442 	 * ... The declaration of a parameter entity must precede
7443 	 * any reference to it...
7444 	 */
7445 	if ((ctxt->standalone == 1) ||
7446 	    ((ctxt->hasExternalSubset == 0) &&
7447 	     (ctxt->hasPErefs == 0))) {
7448 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7449 			      "PEReference: %%%s; not found\n",
7450 			      name);
7451 	} else {
7452 	    /*
7453 	     * [ VC: Entity Declared ]
7454 	     * In a document with an external subset or external
7455 	     * parameter entities with "standalone='no'", ...
7456 	     * ... The declaration of a parameter entity must
7457 	     * precede any reference to it...
7458 	     */
7459 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7460 			  "PEReference: %%%s; not found\n",
7461 			  name, NULL);
7462 	    ctxt->valid = 0;
7463 	}
7464     } else {
7465 	/*
7466 	 * Internal checking in case the entity quest barfed
7467 	 */
7468 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7469 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7470 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7471 		  "Internal: %%%s; is not a parameter entity\n",
7472 			  name, NULL);
7473 	} else if (ctxt->input->free != deallocblankswrapper) {
7474 	    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7475 	    if (xmlPushInput(ctxt, input) < 0)
7476 		return;
7477 	} else {
7478 	    /*
7479 	     * TODO !!!
7480 	     * handle the extra spaces added before and after
7481 	     * c.f. http://www.w3.org/TR/REC-xml#as-PE
7482 	     */
7483 	    input = xmlNewEntityInputStream(ctxt, entity);
7484 	    if (xmlPushInput(ctxt, input) < 0)
7485 		return;
7486 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7487 		(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7488 		(IS_BLANK_CH(NXT(5)))) {
7489 		xmlParseTextDecl(ctxt);
7490 		if (ctxt->errNo ==
7491 		    XML_ERR_UNSUPPORTED_ENCODING) {
7492 		    /*
7493 		     * The XML REC instructs us to stop parsing
7494 		     * right here
7495 		     */
7496 		    ctxt->instate = XML_PARSER_EOF;
7497 		    return;
7498 		}
7499 	    }
7500 	}
7501     }
7502     ctxt->hasPErefs = 1;
7503 }
7504 
7505 /**
7506  * xmlLoadEntityContent:
7507  * @ctxt:  an XML parser context
7508  * @entity: an unloaded system entity
7509  *
7510  * Load the original content of the given system entity from the
7511  * ExternalID/SystemID given. This is to be used for Included in Literal
7512  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7513  *
7514  * Returns 0 in case of success and -1 in case of failure
7515  */
7516 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7517 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7518     xmlParserInputPtr input;
7519     xmlBufferPtr buf;
7520     int l, c;
7521     int count = 0;
7522 
7523     if ((ctxt == NULL) || (entity == NULL) ||
7524         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7525 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7526 	(entity->content != NULL)) {
7527 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7528 	            "xmlLoadEntityContent parameter error");
7529         return(-1);
7530     }
7531 
7532     if (xmlParserDebugEntities)
7533 	xmlGenericError(xmlGenericErrorContext,
7534 		"Reading %s entity content input\n", entity->name);
7535 
7536     buf = xmlBufferCreate();
7537     if (buf == NULL) {
7538 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7539 	            "xmlLoadEntityContent parameter error");
7540         return(-1);
7541     }
7542 
7543     input = xmlNewEntityInputStream(ctxt, entity);
7544     if (input == NULL) {
7545 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7546 	            "xmlLoadEntityContent input error");
7547 	xmlBufferFree(buf);
7548         return(-1);
7549     }
7550 
7551     /*
7552      * Push the entity as the current input, read char by char
7553      * saving to the buffer until the end of the entity or an error
7554      */
7555     if (xmlPushInput(ctxt, input) < 0) {
7556         xmlBufferFree(buf);
7557 	return(-1);
7558     }
7559 
7560     GROW;
7561     c = CUR_CHAR(l);
7562     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7563            (IS_CHAR(c))) {
7564         xmlBufferAdd(buf, ctxt->input->cur, l);
7565 	if (count++ > 100) {
7566 	    count = 0;
7567 	    GROW;
7568 	}
7569 	NEXTL(l);
7570 	c = CUR_CHAR(l);
7571     }
7572 
7573     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7574         xmlPopInput(ctxt);
7575     } else if (!IS_CHAR(c)) {
7576         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7577                           "xmlLoadEntityContent: invalid char value %d\n",
7578 	                  c);
7579 	xmlBufferFree(buf);
7580 	return(-1);
7581     }
7582     entity->content = buf->content;
7583     buf->content = NULL;
7584     xmlBufferFree(buf);
7585 
7586     return(0);
7587 }
7588 
7589 /**
7590  * xmlParseStringPEReference:
7591  * @ctxt:  an XML parser context
7592  * @str:  a pointer to an index in the string
7593  *
7594  * parse PEReference declarations
7595  *
7596  * [69] PEReference ::= '%' Name ';'
7597  *
7598  * [ WFC: No Recursion ]
7599  * A parsed entity must not contain a recursive
7600  * reference to itself, either directly or indirectly.
7601  *
7602  * [ WFC: Entity Declared ]
7603  * In a document without any DTD, a document with only an internal DTD
7604  * subset which contains no parameter entity references, or a document
7605  * with "standalone='yes'", ...  ... The declaration of a parameter
7606  * entity must precede any reference to it...
7607  *
7608  * [ VC: Entity Declared ]
7609  * In a document with an external subset or external parameter entities
7610  * with "standalone='no'", ...  ... The declaration of a parameter entity
7611  * must precede any reference to it...
7612  *
7613  * [ WFC: In DTD ]
7614  * Parameter-entity references may only appear in the DTD.
7615  * NOTE: misleading but this is handled.
7616  *
7617  * Returns the string of the entity content.
7618  *         str is updated to the current value of the index
7619  */
7620 xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)7621 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7622     const xmlChar *ptr;
7623     xmlChar cur;
7624     xmlChar *name;
7625     xmlEntityPtr entity = NULL;
7626 
7627     if ((str == NULL) || (*str == NULL)) return(NULL);
7628     ptr = *str;
7629     cur = *ptr;
7630     if (cur != '%')
7631         return(NULL);
7632     ptr++;
7633     cur = *ptr;
7634     name = xmlParseStringName(ctxt, &ptr);
7635     if (name == NULL) {
7636 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7637 		       "xmlParseStringPEReference: no name\n");
7638 	*str = ptr;
7639 	return(NULL);
7640     }
7641     cur = *ptr;
7642     if (cur != ';') {
7643 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7644 	xmlFree(name);
7645 	*str = ptr;
7646 	return(NULL);
7647     }
7648     ptr++;
7649 
7650     /*
7651      * Increate the number of entity references parsed
7652      */
7653     ctxt->nbentities++;
7654 
7655     /*
7656      * Request the entity from SAX
7657      */
7658     if ((ctxt->sax != NULL) &&
7659 	(ctxt->sax->getParameterEntity != NULL))
7660 	entity = ctxt->sax->getParameterEntity(ctxt->userData,
7661 					       name);
7662     if (entity == NULL) {
7663 	/*
7664 	 * [ WFC: Entity Declared ]
7665 	 * In a document without any DTD, a document with only an
7666 	 * internal DTD subset which contains no parameter entity
7667 	 * references, or a document with "standalone='yes'", ...
7668 	 * ... The declaration of a parameter entity must precede
7669 	 * any reference to it...
7670 	 */
7671 	if ((ctxt->standalone == 1) ||
7672 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7673 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7674 		 "PEReference: %%%s; not found\n", name);
7675 	} else {
7676 	    /*
7677 	     * [ VC: Entity Declared ]
7678 	     * In a document with an external subset or external
7679 	     * parameter entities with "standalone='no'", ...
7680 	     * ... The declaration of a parameter entity must
7681 	     * precede any reference to it...
7682 	     */
7683 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7684 			  "PEReference: %%%s; not found\n",
7685 			  name, NULL);
7686 	    ctxt->valid = 0;
7687 	}
7688     } else {
7689 	/*
7690 	 * Internal checking in case the entity quest barfed
7691 	 */
7692 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7693 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7694 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7695 			  "%%%s; is not a parameter entity\n",
7696 			  name, NULL);
7697 	}
7698     }
7699     ctxt->hasPErefs = 1;
7700     xmlFree(name);
7701     *str = ptr;
7702     return(entity);
7703 }
7704 
7705 /**
7706  * xmlParseDocTypeDecl:
7707  * @ctxt:  an XML parser context
7708  *
7709  * parse a DOCTYPE declaration
7710  *
7711  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7712  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7713  *
7714  * [ VC: Root Element Type ]
7715  * The Name in the document type declaration must match the element
7716  * type of the root element.
7717  */
7718 
7719 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)7720 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7721     const xmlChar *name = NULL;
7722     xmlChar *ExternalID = NULL;
7723     xmlChar *URI = NULL;
7724 
7725     /*
7726      * We know that '<!DOCTYPE' has been detected.
7727      */
7728     SKIP(9);
7729 
7730     SKIP_BLANKS;
7731 
7732     /*
7733      * Parse the DOCTYPE name.
7734      */
7735     name = xmlParseName(ctxt);
7736     if (name == NULL) {
7737 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7738 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7739     }
7740     ctxt->intSubName = name;
7741 
7742     SKIP_BLANKS;
7743 
7744     /*
7745      * Check for SystemID and ExternalID
7746      */
7747     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7748 
7749     if ((URI != NULL) || (ExternalID != NULL)) {
7750         ctxt->hasExternalSubset = 1;
7751     }
7752     ctxt->extSubURI = URI;
7753     ctxt->extSubSystem = ExternalID;
7754 
7755     SKIP_BLANKS;
7756 
7757     /*
7758      * Create and update the internal subset.
7759      */
7760     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7761 	(!ctxt->disableSAX))
7762 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7763 
7764     /*
7765      * Is there any internal subset declarations ?
7766      * they are handled separately in xmlParseInternalSubset()
7767      */
7768     if (RAW == '[')
7769 	return;
7770 
7771     /*
7772      * We should be at the end of the DOCTYPE declaration.
7773      */
7774     if (RAW != '>') {
7775 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7776     }
7777     NEXT;
7778 }
7779 
7780 /**
7781  * xmlParseInternalSubset:
7782  * @ctxt:  an XML parser context
7783  *
7784  * parse the internal subset declaration
7785  *
7786  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7787  */
7788 
7789 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)7790 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7791     /*
7792      * Is there any DTD definition ?
7793      */
7794     if (RAW == '[') {
7795         ctxt->instate = XML_PARSER_DTD;
7796         NEXT;
7797 	/*
7798 	 * Parse the succession of Markup declarations and
7799 	 * PEReferences.
7800 	 * Subsequence (markupdecl | PEReference | S)*
7801 	 */
7802 	while (RAW != ']') {
7803 	    const xmlChar *check = CUR_PTR;
7804 	    unsigned int cons = ctxt->input->consumed;
7805 
7806 	    SKIP_BLANKS;
7807 	    xmlParseMarkupDecl(ctxt);
7808 	    xmlParsePEReference(ctxt);
7809 
7810 	    /*
7811 	     * Pop-up of finished entities.
7812 	     */
7813 	    while ((RAW == 0) && (ctxt->inputNr > 1))
7814 		xmlPopInput(ctxt);
7815 
7816 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7817 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7818 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
7819 		break;
7820 	    }
7821 	}
7822 	if (RAW == ']') {
7823 	    NEXT;
7824 	    SKIP_BLANKS;
7825 	}
7826     }
7827 
7828     /*
7829      * We should be at the end of the DOCTYPE declaration.
7830      */
7831     if (RAW != '>') {
7832 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7833     }
7834     NEXT;
7835 }
7836 
7837 #ifdef LIBXML_SAX1_ENABLED
7838 /**
7839  * xmlParseAttribute:
7840  * @ctxt:  an XML parser context
7841  * @value:  a xmlChar ** used to store the value of the attribute
7842  *
7843  * parse an attribute
7844  *
7845  * [41] Attribute ::= Name Eq AttValue
7846  *
7847  * [ WFC: No External Entity References ]
7848  * Attribute values cannot contain direct or indirect entity references
7849  * to external entities.
7850  *
7851  * [ WFC: No < in Attribute Values ]
7852  * The replacement text of any entity referred to directly or indirectly in
7853  * an attribute value (other than "&lt;") must not contain a <.
7854  *
7855  * [ VC: Attribute Value Type ]
7856  * The attribute must have been declared; the value must be of the type
7857  * declared for it.
7858  *
7859  * [25] Eq ::= S? '=' S?
7860  *
7861  * With namespace:
7862  *
7863  * [NS 11] Attribute ::= QName Eq AttValue
7864  *
7865  * Also the case QName == xmlns:??? is handled independently as a namespace
7866  * definition.
7867  *
7868  * Returns the attribute name, and the value in *value.
7869  */
7870 
7871 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)7872 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7873     const xmlChar *name;
7874     xmlChar *val;
7875 
7876     *value = NULL;
7877     GROW;
7878     name = xmlParseName(ctxt);
7879     if (name == NULL) {
7880 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7881 	               "error parsing attribute name\n");
7882         return(NULL);
7883     }
7884 
7885     /*
7886      * read the value
7887      */
7888     SKIP_BLANKS;
7889     if (RAW == '=') {
7890         NEXT;
7891 	SKIP_BLANKS;
7892 	val = xmlParseAttValue(ctxt);
7893 	ctxt->instate = XML_PARSER_CONTENT;
7894     } else {
7895 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7896 	       "Specification mandate value for attribute %s\n", name);
7897 	return(NULL);
7898     }
7899 
7900     /*
7901      * Check that xml:lang conforms to the specification
7902      * No more registered as an error, just generate a warning now
7903      * since this was deprecated in XML second edition
7904      */
7905     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7906 	if (!xmlCheckLanguageID(val)) {
7907 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7908 		          "Malformed value for xml:lang : %s\n",
7909 			  val, NULL);
7910 	}
7911     }
7912 
7913     /*
7914      * Check that xml:space conforms to the specification
7915      */
7916     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7917 	if (xmlStrEqual(val, BAD_CAST "default"))
7918 	    *(ctxt->space) = 0;
7919 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
7920 	    *(ctxt->space) = 1;
7921 	else {
7922 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7923 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7924                                  val, NULL);
7925 	}
7926     }
7927 
7928     *value = val;
7929     return(name);
7930 }
7931 
7932 /**
7933  * xmlParseStartTag:
7934  * @ctxt:  an XML parser context
7935  *
7936  * parse a start of tag either for rule element or
7937  * EmptyElement. In both case we don't parse the tag closing chars.
7938  *
7939  * [40] STag ::= '<' Name (S Attribute)* S? '>'
7940  *
7941  * [ WFC: Unique Att Spec ]
7942  * No attribute name may appear more than once in the same start-tag or
7943  * empty-element tag.
7944  *
7945  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7946  *
7947  * [ WFC: Unique Att Spec ]
7948  * No attribute name may appear more than once in the same start-tag or
7949  * empty-element tag.
7950  *
7951  * With namespace:
7952  *
7953  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7954  *
7955  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7956  *
7957  * Returns the element name parsed
7958  */
7959 
7960 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)7961 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7962     const xmlChar *name;
7963     const xmlChar *attname;
7964     xmlChar *attvalue;
7965     const xmlChar **atts = ctxt->atts;
7966     int nbatts = 0;
7967     int maxatts = ctxt->maxatts;
7968     int i;
7969 
7970     if (RAW != '<') return(NULL);
7971     NEXT1;
7972 
7973     name = xmlParseName(ctxt);
7974     if (name == NULL) {
7975 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7976 	     "xmlParseStartTag: invalid element name\n");
7977         return(NULL);
7978     }
7979 
7980     /*
7981      * Now parse the attributes, it ends up with the ending
7982      *
7983      * (S Attribute)* S?
7984      */
7985     SKIP_BLANKS;
7986     GROW;
7987 
7988     while ((RAW != '>') &&
7989 	   ((RAW != '/') || (NXT(1) != '>')) &&
7990 	   (IS_BYTE_CHAR(RAW))) {
7991 	const xmlChar *q = CUR_PTR;
7992 	unsigned int cons = ctxt->input->consumed;
7993 
7994 	attname = xmlParseAttribute(ctxt, &attvalue);
7995         if ((attname != NULL) && (attvalue != NULL)) {
7996 	    /*
7997 	     * [ WFC: Unique Att Spec ]
7998 	     * No attribute name may appear more than once in the same
7999 	     * start-tag or empty-element tag.
8000 	     */
8001 	    for (i = 0; i < nbatts;i += 2) {
8002 	        if (xmlStrEqual(atts[i], attname)) {
8003 		    xmlErrAttributeDup(ctxt, NULL, attname);
8004 		    xmlFree(attvalue);
8005 		    goto failed;
8006 		}
8007 	    }
8008 	    /*
8009 	     * Add the pair to atts
8010 	     */
8011 	    if (atts == NULL) {
8012 	        maxatts = 22; /* allow for 10 attrs by default */
8013 	        atts = (const xmlChar **)
8014 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8015 		if (atts == NULL) {
8016 		    xmlErrMemory(ctxt, NULL);
8017 		    if (attvalue != NULL)
8018 			xmlFree(attvalue);
8019 		    goto failed;
8020 		}
8021 		ctxt->atts = atts;
8022 		ctxt->maxatts = maxatts;
8023 	    } else if (nbatts + 4 > maxatts) {
8024 	        const xmlChar **n;
8025 
8026 	        maxatts *= 2;
8027 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8028 					     maxatts * sizeof(const xmlChar *));
8029 		if (n == NULL) {
8030 		    xmlErrMemory(ctxt, NULL);
8031 		    if (attvalue != NULL)
8032 			xmlFree(attvalue);
8033 		    goto failed;
8034 		}
8035 		atts = n;
8036 		ctxt->atts = atts;
8037 		ctxt->maxatts = maxatts;
8038 	    }
8039 	    atts[nbatts++] = attname;
8040 	    atts[nbatts++] = attvalue;
8041 	    atts[nbatts] = NULL;
8042 	    atts[nbatts + 1] = NULL;
8043 	} else {
8044 	    if (attvalue != NULL)
8045 		xmlFree(attvalue);
8046 	}
8047 
8048 failed:
8049 
8050 	GROW
8051 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8052 	    break;
8053 	if (!IS_BLANK_CH(RAW)) {
8054 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8055 			   "attributes construct error\n");
8056 	}
8057 	SKIP_BLANKS;
8058         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8059             (attname == NULL) && (attvalue == NULL)) {
8060 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8061 			   "xmlParseStartTag: problem parsing attributes\n");
8062 	    break;
8063 	}
8064 	SHRINK;
8065         GROW;
8066     }
8067 
8068     /*
8069      * SAX: Start of Element !
8070      */
8071     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8072 	(!ctxt->disableSAX)) {
8073 	if (nbatts > 0)
8074 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8075 	else
8076 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8077     }
8078 
8079     if (atts != NULL) {
8080         /* Free only the content strings */
8081         for (i = 1;i < nbatts;i+=2)
8082 	    if (atts[i] != NULL)
8083 	       xmlFree((xmlChar *) atts[i]);
8084     }
8085     return(name);
8086 }
8087 
8088 /**
8089  * xmlParseEndTag1:
8090  * @ctxt:  an XML parser context
8091  * @line:  line of the start tag
8092  * @nsNr:  number of namespaces on the start tag
8093  *
8094  * parse an end of tag
8095  *
8096  * [42] ETag ::= '</' Name S? '>'
8097  *
8098  * With namespace
8099  *
8100  * [NS 9] ETag ::= '</' QName S? '>'
8101  */
8102 
8103 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8104 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8105     const xmlChar *name;
8106 
8107     GROW;
8108     if ((RAW != '<') || (NXT(1) != '/')) {
8109 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8110 		       "xmlParseEndTag: '</' not found\n");
8111 	return;
8112     }
8113     SKIP(2);
8114 
8115     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8116 
8117     /*
8118      * We should definitely be at the ending "S? '>'" part
8119      */
8120     GROW;
8121     SKIP_BLANKS;
8122     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8123 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8124     } else
8125 	NEXT1;
8126 
8127     /*
8128      * [ WFC: Element Type Match ]
8129      * The Name in an element's end-tag must match the element type in the
8130      * start-tag.
8131      *
8132      */
8133     if (name != (xmlChar*)1) {
8134         if (name == NULL) name = BAD_CAST "unparseable";
8135         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8136 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8137 		                ctxt->name, line, name);
8138     }
8139 
8140     /*
8141      * SAX: End of Tag
8142      */
8143     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8144 	(!ctxt->disableSAX))
8145         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8146 
8147     namePop(ctxt);
8148     spacePop(ctxt);
8149     return;
8150 }
8151 
8152 /**
8153  * xmlParseEndTag:
8154  * @ctxt:  an XML parser context
8155  *
8156  * parse an end of tag
8157  *
8158  * [42] ETag ::= '</' Name S? '>'
8159  *
8160  * With namespace
8161  *
8162  * [NS 9] ETag ::= '</' QName S? '>'
8163  */
8164 
8165 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8166 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8167     xmlParseEndTag1(ctxt, 0);
8168 }
8169 #endif /* LIBXML_SAX1_ENABLED */
8170 
8171 /************************************************************************
8172  *									*
8173  *		      SAX 2 specific operations				*
8174  *									*
8175  ************************************************************************/
8176 
8177 /*
8178  * xmlGetNamespace:
8179  * @ctxt:  an XML parser context
8180  * @prefix:  the prefix to lookup
8181  *
8182  * Lookup the namespace name for the @prefix (which ca be NULL)
8183  * The prefix must come from the @ctxt->dict dictionnary
8184  *
8185  * Returns the namespace name or NULL if not bound
8186  */
8187 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8188 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8189     int i;
8190 
8191     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8192     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8193         if (ctxt->nsTab[i] == prefix) {
8194 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8195 	        return(NULL);
8196 	    return(ctxt->nsTab[i + 1]);
8197 	}
8198     if (ctxt->nsParent) return xmlGetNamespace(ctxt->nsParent, prefix);
8199     return(NULL);
8200 }
8201 
8202 /**
8203  * xmlParseQName:
8204  * @ctxt:  an XML parser context
8205  * @prefix:  pointer to store the prefix part
8206  *
8207  * parse an XML Namespace QName
8208  *
8209  * [6]  QName  ::= (Prefix ':')? LocalPart
8210  * [7]  Prefix  ::= NCName
8211  * [8]  LocalPart  ::= NCName
8212  *
8213  * Returns the Name parsed or NULL
8214  */
8215 
8216 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8217 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8218     const xmlChar *l, *p;
8219 
8220     GROW;
8221 
8222     l = xmlParseNCName(ctxt);
8223     if (l == NULL) {
8224         if (CUR == ':') {
8225 	    l = xmlParseName(ctxt);
8226 	    if (l != NULL) {
8227 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8228 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8229 		*prefix = NULL;
8230 		return(l);
8231 	    }
8232 	}
8233         return(NULL);
8234     }
8235     if (CUR == ':') {
8236         NEXT;
8237 	p = l;
8238 	l = xmlParseNCName(ctxt);
8239 	if (l == NULL) {
8240 	    xmlChar *tmp;
8241 
8242             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8243 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8244 	    l = xmlParseNmtoken(ctxt);
8245 	    if (l == NULL)
8246 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8247 	    else {
8248 		tmp = xmlBuildQName(l, p, NULL, 0);
8249 		xmlFree((char *)l);
8250 	    }
8251 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8252 	    if (tmp != NULL) xmlFree(tmp);
8253 	    *prefix = NULL;
8254 	    return(p);
8255 	}
8256 	if (CUR == ':') {
8257 	    xmlChar *tmp;
8258 
8259             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8260 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8261 	    NEXT;
8262 	    tmp = (xmlChar *) xmlParseName(ctxt);
8263 	    if (tmp != NULL) {
8264 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8265 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8266 		if (tmp != NULL) xmlFree(tmp);
8267 		*prefix = p;
8268 		return(l);
8269 	    }
8270 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8271 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8272 	    if (tmp != NULL) xmlFree(tmp);
8273 	    *prefix = p;
8274 	    return(l);
8275 	}
8276 	*prefix = p;
8277     } else
8278         *prefix = NULL;
8279     return(l);
8280 }
8281 
8282 /**
8283  * xmlParseQNameAndCompare:
8284  * @ctxt:  an XML parser context
8285  * @name:  the localname
8286  * @prefix:  the prefix, if any.
8287  *
8288  * parse an XML name and compares for match
8289  * (specialized for endtag parsing)
8290  *
8291  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8292  * and the name for mismatch
8293  */
8294 
8295 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8296 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8297                         xmlChar const *prefix) {
8298     const xmlChar *cmp = name;
8299     const xmlChar *in;
8300     const xmlChar *ret;
8301     const xmlChar *prefix2;
8302 
8303     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8304 
8305     GROW;
8306     in = ctxt->input->cur;
8307 
8308     cmp = prefix;
8309     while (*in != 0 && *in == *cmp) {
8310     	++in;
8311 	++cmp;
8312     }
8313     if ((*cmp == 0) && (*in == ':')) {
8314         in++;
8315 	cmp = name;
8316 	while (*in != 0 && *in == *cmp) {
8317 	    ++in;
8318 	    ++cmp;
8319 	}
8320 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8321 	    /* success */
8322 	    ctxt->input->cur = in;
8323 	    return((const xmlChar*) 1);
8324 	}
8325     }
8326     /*
8327      * all strings coms from the dictionary, equality can be done directly
8328      */
8329     ret = xmlParseQName (ctxt, &prefix2);
8330     if ((ret == name) && (prefix == prefix2))
8331 	return((const xmlChar*) 1);
8332     return ret;
8333 }
8334 
8335 /**
8336  * xmlParseAttValueInternal:
8337  * @ctxt:  an XML parser context
8338  * @len:  attribute len result
8339  * @alloc:  whether the attribute was reallocated as a new string
8340  * @normalize:  if 1 then further non-CDATA normalization must be done
8341  *
8342  * parse a value for an attribute.
8343  * NOTE: if no normalization is needed, the routine will return pointers
8344  *       directly from the data buffer.
8345  *
8346  * 3.3.3 Attribute-Value Normalization:
8347  * Before the value of an attribute is passed to the application or
8348  * checked for validity, the XML processor must normalize it as follows:
8349  * - a character reference is processed by appending the referenced
8350  *   character to the attribute value
8351  * - an entity reference is processed by recursively processing the
8352  *   replacement text of the entity
8353  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8354  *   appending #x20 to the normalized value, except that only a single
8355  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8356  *   parsed entity or the literal entity value of an internal parsed entity
8357  * - other characters are processed by appending them to the normalized value
8358  * If the declared value is not CDATA, then the XML processor must further
8359  * process the normalized attribute value by discarding any leading and
8360  * trailing space (#x20) characters, and by replacing sequences of space
8361  * (#x20) characters by a single space (#x20) character.
8362  * All attributes for which no declaration has been read should be treated
8363  * by a non-validating parser as if declared CDATA.
8364  *
8365  * Returns the AttValue parsed or NULL. The value has to be freed by the
8366  *     caller if it was copied, this can be detected by val[*len] == 0.
8367  */
8368 
8369 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8370 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8371                          int normalize)
8372 {
8373     xmlChar limit = 0;
8374     const xmlChar *in = NULL, *start, *end, *last;
8375     xmlChar *ret = NULL;
8376 
8377     GROW;
8378     in = (xmlChar *) CUR_PTR;
8379     if (*in != '"' && *in != '\'') {
8380         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8381         return (NULL);
8382     }
8383     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8384 
8385     /*
8386      * try to handle in this routine the most common case where no
8387      * allocation of a new string is required and where content is
8388      * pure ASCII.
8389      */
8390     limit = *in++;
8391     end = ctxt->input->end;
8392     start = in;
8393     if (in >= end) {
8394         const xmlChar *oldbase = ctxt->input->base;
8395 	GROW;
8396 	if (oldbase != ctxt->input->base) {
8397 	    long delta = ctxt->input->base - oldbase;
8398 	    start = start + delta;
8399 	    in = in + delta;
8400 	}
8401 	end = ctxt->input->end;
8402     }
8403     if (normalize) {
8404         /*
8405 	 * Skip any leading spaces
8406 	 */
8407 	while ((in < end) && (*in != limit) &&
8408 	       ((*in == 0x20) || (*in == 0x9) ||
8409 	        (*in == 0xA) || (*in == 0xD))) {
8410 	    in++;
8411 	    start = in;
8412 	    if (in >= end) {
8413 		const xmlChar *oldbase = ctxt->input->base;
8414 		GROW;
8415 		if (oldbase != ctxt->input->base) {
8416 		    long delta = ctxt->input->base - oldbase;
8417 		    start = start + delta;
8418 		    in = in + delta;
8419 		}
8420 		end = ctxt->input->end;
8421 	    }
8422 	}
8423 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8424 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8425 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
8426 	    if (in >= end) {
8427 		const xmlChar *oldbase = ctxt->input->base;
8428 		GROW;
8429 		if (oldbase != ctxt->input->base) {
8430 		    long delta = ctxt->input->base - oldbase;
8431 		    start = start + delta;
8432 		    in = in + delta;
8433 		}
8434 		end = ctxt->input->end;
8435 	    }
8436 	}
8437 	last = in;
8438 	/*
8439 	 * skip the trailing blanks
8440 	 */
8441 	while ((last[-1] == 0x20) && (last > start)) last--;
8442 	while ((in < end) && (*in != limit) &&
8443 	       ((*in == 0x20) || (*in == 0x9) ||
8444 	        (*in == 0xA) || (*in == 0xD))) {
8445 	    in++;
8446 	    if (in >= end) {
8447 		const xmlChar *oldbase = ctxt->input->base;
8448 		GROW;
8449 		if (oldbase != ctxt->input->base) {
8450 		    long delta = ctxt->input->base - oldbase;
8451 		    start = start + delta;
8452 		    in = in + delta;
8453 		    last = last + delta;
8454 		}
8455 		end = ctxt->input->end;
8456 	    }
8457 	}
8458 	if (*in != limit) goto need_complex;
8459     } else {
8460 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8461 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8462 	    in++;
8463 	    if (in >= end) {
8464 		const xmlChar *oldbase = ctxt->input->base;
8465 		GROW;
8466 		if (oldbase != ctxt->input->base) {
8467 		    long delta = ctxt->input->base - oldbase;
8468 		    start = start + delta;
8469 		    in = in + delta;
8470 		}
8471 		end = ctxt->input->end;
8472 	    }
8473 	}
8474 	last = in;
8475 	if (*in != limit) goto need_complex;
8476     }
8477     in++;
8478     if (len != NULL) {
8479         *len = last - start;
8480         ret = (xmlChar *) start;
8481     } else {
8482         if (alloc) *alloc = 1;
8483         ret = xmlStrndup(start, last - start);
8484     }
8485     CUR_PTR = in;
8486     if (alloc) *alloc = 0;
8487     return ret;
8488 need_complex:
8489     if (alloc) *alloc = 1;
8490     return xmlParseAttValueComplex(ctxt, len, normalize);
8491 }
8492 
8493 /**
8494  * xmlParseAttribute2:
8495  * @ctxt:  an XML parser context
8496  * @pref:  the element prefix
8497  * @elem:  the element name
8498  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8499  * @value:  a xmlChar ** used to store the value of the attribute
8500  * @len:  an int * to save the length of the attribute
8501  * @alloc:  an int * to indicate if the attribute was allocated
8502  *
8503  * parse an attribute in the new SAX2 framework.
8504  *
8505  * Returns the attribute name, and the value in *value, .
8506  */
8507 
8508 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)8509 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8510                    const xmlChar * pref, const xmlChar * elem,
8511                    const xmlChar ** prefix, xmlChar ** value,
8512                    int *len, int *alloc)
8513 {
8514     const xmlChar *name;
8515     xmlChar *val, *internal_val = NULL;
8516     int normalize = 0;
8517 
8518     *value = NULL;
8519     GROW;
8520     name = xmlParseQName(ctxt, prefix);
8521     if (name == NULL) {
8522         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8523                        "error parsing attribute name\n");
8524         return (NULL);
8525     }
8526 
8527     /*
8528      * get the type if needed
8529      */
8530     if (ctxt->attsSpecial != NULL) {
8531         int type;
8532 
8533         type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8534                                             pref, elem, *prefix, name);
8535         if (type != 0)
8536             normalize = 1;
8537     }
8538 
8539     /*
8540      * read the value
8541      */
8542     SKIP_BLANKS;
8543     if (RAW == '=') {
8544         NEXT;
8545         SKIP_BLANKS;
8546         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8547 	if (normalize) {
8548 	    /*
8549 	     * Sometimes a second normalisation pass for spaces is needed
8550 	     * but that only happens if charrefs or entities refernces
8551 	     * have been used in the attribute value, i.e. the attribute
8552 	     * value have been extracted in an allocated string already.
8553 	     */
8554 	    if (*alloc) {
8555 	        const xmlChar *val2;
8556 
8557 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8558 		if ((val2 != NULL) && (val2 != val)) {
8559 		    xmlFree(val);
8560 		    val = (xmlChar *) val2;
8561 		}
8562 	    }
8563 	}
8564         ctxt->instate = XML_PARSER_CONTENT;
8565     } else {
8566         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8567                           "Specification mandate value for attribute %s\n",
8568                           name);
8569         return (NULL);
8570     }
8571 
8572     if (*prefix == ctxt->str_xml) {
8573         /*
8574          * Check that xml:lang conforms to the specification
8575          * No more registered as an error, just generate a warning now
8576          * since this was deprecated in XML second edition
8577          */
8578         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8579             internal_val = xmlStrndup(val, *len);
8580             if (!xmlCheckLanguageID(internal_val)) {
8581                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8582                               "Malformed value for xml:lang : %s\n",
8583                               internal_val, NULL);
8584             }
8585         }
8586 
8587         /*
8588          * Check that xml:space conforms to the specification
8589          */
8590         if (xmlStrEqual(name, BAD_CAST "space")) {
8591             internal_val = xmlStrndup(val, *len);
8592             if (xmlStrEqual(internal_val, BAD_CAST "default"))
8593                 *(ctxt->space) = 0;
8594             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8595                 *(ctxt->space) = 1;
8596             else {
8597                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8598                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8599                               internal_val, NULL);
8600             }
8601         }
8602         if (internal_val) {
8603             xmlFree(internal_val);
8604         }
8605     }
8606 
8607     *value = val;
8608     return (name);
8609 }
8610 /**
8611  * xmlParseStartTag2:
8612  * @ctxt:  an XML parser context
8613  *
8614  * parse a start of tag either for rule element or
8615  * EmptyElement. In both case we don't parse the tag closing chars.
8616  * This routine is called when running SAX2 parsing
8617  *
8618  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8619  *
8620  * [ WFC: Unique Att Spec ]
8621  * No attribute name may appear more than once in the same start-tag or
8622  * empty-element tag.
8623  *
8624  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8625  *
8626  * [ WFC: Unique Att Spec ]
8627  * No attribute name may appear more than once in the same start-tag or
8628  * empty-element tag.
8629  *
8630  * With namespace:
8631  *
8632  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8633  *
8634  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8635  *
8636  * Returns the element name parsed
8637  */
8638 
8639 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)8640 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8641                   const xmlChar **URI, int *tlen) {
8642     const xmlChar *localname;
8643     const xmlChar *prefix;
8644     const xmlChar *attname;
8645     const xmlChar *aprefix;
8646     const xmlChar *nsname;
8647     xmlChar *attvalue;
8648     const xmlChar **atts = ctxt->atts;
8649     int maxatts = ctxt->maxatts;
8650     int nratts, nbatts, nbdef;
8651     int i, j, nbNs, attval, oldline, oldcol;
8652     const xmlChar *base;
8653     unsigned long cur;
8654     int nsNr = ctxt->nsNr;
8655 
8656     if (RAW != '<') return(NULL);
8657     NEXT1;
8658 
8659     /*
8660      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8661      *       point since the attribute values may be stored as pointers to
8662      *       the buffer and calling SHRINK would destroy them !
8663      *       The Shrinking is only possible once the full set of attribute
8664      *       callbacks have been done.
8665      */
8666 reparse:
8667     SHRINK;
8668     base = ctxt->input->base;
8669     cur = ctxt->input->cur - ctxt->input->base;
8670     oldline = ctxt->input->line;
8671     oldcol = ctxt->input->col;
8672     nbatts = 0;
8673     nratts = 0;
8674     nbdef = 0;
8675     nbNs = 0;
8676     attval = 0;
8677     /* Forget any namespaces added during an earlier parse of this element. */
8678     ctxt->nsNr = nsNr;
8679 
8680     localname = xmlParseQName(ctxt, &prefix);
8681     if (localname == NULL) {
8682 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8683 		       "StartTag: invalid element name\n");
8684         return(NULL);
8685     }
8686     *tlen = ctxt->input->cur - ctxt->input->base - cur;
8687 
8688     /*
8689      * Now parse the attributes, it ends up with the ending
8690      *
8691      * (S Attribute)* S?
8692      */
8693     SKIP_BLANKS;
8694     GROW;
8695     if (ctxt->input->base != base) goto base_changed;
8696 
8697     while ((RAW != '>') &&
8698 	   ((RAW != '/') || (NXT(1) != '>')) &&
8699 	   (IS_BYTE_CHAR(RAW))) {
8700 	const xmlChar *q = CUR_PTR;
8701 	unsigned int cons = ctxt->input->consumed;
8702 	int len = -1, alloc = 0;
8703 
8704 	attname = xmlParseAttribute2(ctxt, prefix, localname,
8705 	                             &aprefix, &attvalue, &len, &alloc);
8706 	if (ctxt->input->base != base) {
8707 	    if ((attvalue != NULL) && (alloc != 0))
8708 	        xmlFree(attvalue);
8709 	    attvalue = NULL;
8710 	    goto base_changed;
8711 	}
8712         if ((attname != NULL) && (attvalue != NULL)) {
8713 	    if (len < 0) len = xmlStrlen(attvalue);
8714             if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8715 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8716 		xmlURIPtr uri;
8717 
8718                 if (*URL != 0) {
8719 		    uri = xmlParseURI((const char *) URL);
8720 		    if (uri == NULL) {
8721 			xmlNsErr(ctxt, XML_WAR_NS_URI,
8722 			         "xmlns: '%s' is not a valid URI\n",
8723 					   URL, NULL, NULL);
8724 		    } else {
8725 			if (uri->scheme == NULL) {
8726 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8727 				      "xmlns: URI %s is not absolute\n",
8728 				      URL, NULL, NULL);
8729 			}
8730 			xmlFreeURI(uri);
8731 		    }
8732 		    if (URL == ctxt->str_xml_ns) {
8733 			if (attname != ctxt->str_xml) {
8734 			    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8735 			 "xml namespace URI cannot be the default namespace\n",
8736 				     NULL, NULL, NULL);
8737 			}
8738 			goto skip_default_ns;
8739 		    }
8740 		    if ((len == 29) &&
8741 			(xmlStrEqual(URL,
8742 				 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8743 			xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8744 			     "reuse of the xmlns namespace name is forbidden\n",
8745 				 NULL, NULL, NULL);
8746 			goto skip_default_ns;
8747 		    }
8748 		}
8749 		/*
8750 		 * check that it's not a defined namespace
8751 		 */
8752 		for (j = 1;j <= nbNs;j++)
8753 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8754 			break;
8755 		if (j <= nbNs)
8756 		    xmlErrAttributeDup(ctxt, NULL, attname);
8757 		else
8758 		    if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8759 skip_default_ns:
8760 		if (alloc != 0) xmlFree(attvalue);
8761 		SKIP_BLANKS;
8762 		continue;
8763 	    }
8764             if (aprefix == ctxt->str_xmlns) {
8765 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8766 		xmlURIPtr uri;
8767 
8768                 if (attname == ctxt->str_xml) {
8769 		    if (URL != ctxt->str_xml_ns) {
8770 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8771 			         "xml namespace prefix mapped to wrong URI\n",
8772 			         NULL, NULL, NULL);
8773 		    }
8774 		    /*
8775 		     * Do not keep a namespace definition node
8776 		     */
8777 		    goto skip_ns;
8778 		}
8779                 if (URL == ctxt->str_xml_ns) {
8780 		    if (attname != ctxt->str_xml) {
8781 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8782 			         "xml namespace URI mapped to wrong prefix\n",
8783 			         NULL, NULL, NULL);
8784 		    }
8785 		    goto skip_ns;
8786 		}
8787                 if (attname == ctxt->str_xmlns) {
8788 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8789 			     "redefinition of the xmlns prefix is forbidden\n",
8790 			     NULL, NULL, NULL);
8791 		    goto skip_ns;
8792 		}
8793 		if ((len == 29) &&
8794 		    (xmlStrEqual(URL,
8795 		                 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8796 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8797 			     "reuse of the xmlns namespace name is forbidden\n",
8798 			     NULL, NULL, NULL);
8799 		    goto skip_ns;
8800 		}
8801 		if ((URL == NULL) || (URL[0] == 0)) {
8802 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8803 		             "xmlns:%s: Empty XML namespace is not allowed\n",
8804 			          attname, NULL, NULL);
8805 		    goto skip_ns;
8806 		} else {
8807 		    uri = xmlParseURI((const char *) URL);
8808 		    if (uri == NULL) {
8809 			xmlNsErr(ctxt, XML_WAR_NS_URI,
8810 			     "xmlns:%s: '%s' is not a valid URI\n",
8811 					   attname, URL, NULL);
8812 		    } else {
8813 			if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8814 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8815 				      "xmlns:%s: URI %s is not absolute\n",
8816 				      attname, URL, NULL);
8817 			}
8818 			xmlFreeURI(uri);
8819 		    }
8820 		}
8821 
8822 		/*
8823 		 * check that it's not a defined namespace
8824 		 */
8825 		for (j = 1;j <= nbNs;j++)
8826 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8827 			break;
8828 		if (j <= nbNs)
8829 		    xmlErrAttributeDup(ctxt, aprefix, attname);
8830 		else
8831 		    if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8832 skip_ns:
8833 		if (alloc != 0) xmlFree(attvalue);
8834 		SKIP_BLANKS;
8835 		if (ctxt->input->base != base) goto base_changed;
8836 		continue;
8837 	    }
8838 
8839 	    /*
8840 	     * Add the pair to atts
8841 	     */
8842 	    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8843 	        if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8844 		    if (attvalue[len] == 0)
8845 			xmlFree(attvalue);
8846 		    goto failed;
8847 		}
8848 	        maxatts = ctxt->maxatts;
8849 		atts = ctxt->atts;
8850 	    }
8851 	    ctxt->attallocs[nratts++] = alloc;
8852 	    atts[nbatts++] = attname;
8853 	    atts[nbatts++] = aprefix;
8854 	    atts[nbatts++] = NULL; /* the URI will be fetched later */
8855 	    atts[nbatts++] = attvalue;
8856 	    attvalue += len;
8857 	    atts[nbatts++] = attvalue;
8858 	    /*
8859 	     * tag if some deallocation is needed
8860 	     */
8861 	    if (alloc != 0) attval = 1;
8862 	} else {
8863 	    if ((attvalue != NULL) && (attvalue[len] == 0))
8864 		xmlFree(attvalue);
8865 	}
8866 
8867 failed:
8868 
8869 	GROW
8870 	if (ctxt->input->base != base) goto base_changed;
8871 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8872 	    break;
8873 	if (!IS_BLANK_CH(RAW)) {
8874 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8875 			   "attributes construct error\n");
8876 	    break;
8877 	}
8878 	SKIP_BLANKS;
8879         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8880             (attname == NULL) && (attvalue == NULL)) {
8881 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8882 	         "xmlParseStartTag: problem parsing attributes\n");
8883 	    break;
8884 	}
8885         GROW;
8886 	if (ctxt->input->base != base) goto base_changed;
8887     }
8888 
8889     /*
8890      * The attributes defaulting
8891      */
8892     if (ctxt->attsDefault != NULL) {
8893         xmlDefAttrsPtr defaults;
8894 
8895 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8896 	if (defaults != NULL) {
8897 	    for (i = 0;i < defaults->nbAttrs;i++) {
8898 	        attname = defaults->values[5 * i];
8899 		aprefix = defaults->values[5 * i + 1];
8900 
8901                 /*
8902 		 * special work for namespaces defaulted defs
8903 		 */
8904 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8905 		    /*
8906 		     * check that it's not a defined namespace
8907 		     */
8908 		    for (j = 1;j <= nbNs;j++)
8909 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8910 			    break;
8911 	            if (j <= nbNs) continue;
8912 
8913 		    nsname = xmlGetNamespace(ctxt, NULL);
8914 		    if (nsname != defaults->values[5 * i + 2]) {
8915 			if (nsPush(ctxt, NULL,
8916 			           defaults->values[5 * i + 2]) > 0)
8917 			    nbNs++;
8918 		    }
8919 		} else if (aprefix == ctxt->str_xmlns) {
8920 		    /*
8921 		     * check that it's not a defined namespace
8922 		     */
8923 		    for (j = 1;j <= nbNs;j++)
8924 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8925 			    break;
8926 	            if (j <= nbNs) continue;
8927 
8928 		    nsname = xmlGetNamespace(ctxt, attname);
8929 		    if (nsname != defaults->values[2]) {
8930 			if (nsPush(ctxt, attname,
8931 			           defaults->values[5 * i + 2]) > 0)
8932 			    nbNs++;
8933 		    }
8934 		} else {
8935 		    /*
8936 		     * check that it's not a defined attribute
8937 		     */
8938 		    for (j = 0;j < nbatts;j+=5) {
8939 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
8940 			    break;
8941 		    }
8942 		    if (j < nbatts) continue;
8943 
8944 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8945 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8946 			    return(NULL);
8947 			}
8948 			maxatts = ctxt->maxatts;
8949 			atts = ctxt->atts;
8950 		    }
8951 		    atts[nbatts++] = attname;
8952 		    atts[nbatts++] = aprefix;
8953 		    if (aprefix == NULL)
8954 			atts[nbatts++] = NULL;
8955 		    else
8956 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8957 		    atts[nbatts++] = defaults->values[5 * i + 2];
8958 		    atts[nbatts++] = defaults->values[5 * i + 3];
8959 		    if ((ctxt->standalone == 1) &&
8960 		        (defaults->values[5 * i + 4] != NULL)) {
8961 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8962 	  "standalone: attribute %s on %s defaulted from external subset\n",
8963 	                                 attname, localname);
8964 		    }
8965 		    nbdef++;
8966 		}
8967 	    }
8968 	}
8969     }
8970 
8971     /*
8972      * The attributes checkings
8973      */
8974     for (i = 0; i < nbatts;i += 5) {
8975         /*
8976 	* The default namespace does not apply to attribute names.
8977 	*/
8978 	if (atts[i + 1] != NULL) {
8979 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8980 	    if (nsname == NULL) {
8981 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8982 		    "Namespace prefix %s for %s on %s is not defined\n",
8983 		    atts[i + 1], atts[i], localname);
8984 	    }
8985 	    atts[i + 2] = nsname;
8986 	} else
8987 	    nsname = NULL;
8988 	/*
8989 	 * [ WFC: Unique Att Spec ]
8990 	 * No attribute name may appear more than once in the same
8991 	 * start-tag or empty-element tag.
8992 	 * As extended by the Namespace in XML REC.
8993 	 */
8994         for (j = 0; j < i;j += 5) {
8995 	    if (atts[i] == atts[j]) {
8996 	        if (atts[i+1] == atts[j+1]) {
8997 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8998 		    break;
8999 		}
9000 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9001 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9002 			     "Namespaced Attribute %s in '%s' redefined\n",
9003 			     atts[i], nsname, NULL);
9004 		    break;
9005 		}
9006 	    }
9007 	}
9008     }
9009 
9010     nsname = xmlGetNamespace(ctxt, prefix);
9011     if ((prefix != NULL) && (nsname == NULL)) {
9012 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9013 	         "Namespace prefix %s on %s is not defined\n",
9014 		 prefix, localname, NULL);
9015     }
9016     *pref = prefix;
9017     *URI = nsname;
9018 
9019     /*
9020      * SAX: Start of Element !
9021      */
9022     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9023 	(!ctxt->disableSAX)) {
9024 	if (nbNs > 0)
9025 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9026 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9027 			  nbatts / 5, nbdef, atts);
9028 	else
9029 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9030 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9031     }
9032 
9033     /*
9034      * Free up attribute allocated strings if needed
9035      */
9036     if (attval != 0) {
9037 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9038 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9039 	        xmlFree((xmlChar *) atts[i]);
9040     }
9041 
9042     return(localname);
9043 
9044 base_changed:
9045     /*
9046      * the attribute strings are valid iif the base didn't changed
9047      */
9048     if (attval != 0) {
9049 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9050 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9051 	        xmlFree((xmlChar *) atts[i]);
9052     }
9053     ctxt->input->cur = ctxt->input->base + cur;
9054     ctxt->input->line = oldline;
9055     ctxt->input->col = oldcol;
9056     if (ctxt->wellFormed == 1) {
9057 	goto reparse;
9058     }
9059     return(NULL);
9060 }
9061 
9062 /**
9063  * xmlParseEndTag2:
9064  * @ctxt:  an XML parser context
9065  * @line:  line of the start tag
9066  * @nsNr:  number of namespaces on the start tag
9067  *
9068  * parse an end of tag
9069  *
9070  * [42] ETag ::= '</' Name S? '>'
9071  *
9072  * With namespace
9073  *
9074  * [NS 9] ETag ::= '</' QName S? '>'
9075  */
9076 
9077 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9078 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9079                 const xmlChar *URI, int line, int nsNr, int tlen) {
9080     const xmlChar *name;
9081 
9082     GROW;
9083     if ((RAW != '<') || (NXT(1) != '/')) {
9084 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9085 	return;
9086     }
9087     SKIP(2);
9088 
9089     if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9090         if (ctxt->input->cur[tlen] == '>') {
9091 	    ctxt->input->cur += tlen + 1;
9092 	    goto done;
9093 	}
9094 	ctxt->input->cur += tlen;
9095 	name = (xmlChar*)1;
9096     } else {
9097 	if (prefix == NULL)
9098 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9099 	else
9100 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9101     }
9102 
9103     /*
9104      * We should definitely be at the ending "S? '>'" part
9105      */
9106     GROW;
9107     SKIP_BLANKS;
9108     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9109 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9110     } else
9111 	NEXT1;
9112 
9113     /*
9114      * [ WFC: Element Type Match ]
9115      * The Name in an element's end-tag must match the element type in the
9116      * start-tag.
9117      *
9118      */
9119     if (name != (xmlChar*)1) {
9120         if (name == NULL) name = BAD_CAST "unparseable";
9121         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9122 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9123 		                ctxt->name, line, name);
9124     }
9125 
9126     /*
9127      * SAX: End of Tag
9128      */
9129 done:
9130     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9131 	(!ctxt->disableSAX))
9132 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9133 
9134     spacePop(ctxt);
9135     if (nsNr != 0)
9136 	nsPop(ctxt, nsNr);
9137     return;
9138 }
9139 
9140 /**
9141  * xmlParseCDSect:
9142  * @ctxt:  an XML parser context
9143  *
9144  * Parse escaped pure raw content.
9145  *
9146  * [18] CDSect ::= CDStart CData CDEnd
9147  *
9148  * [19] CDStart ::= '<![CDATA['
9149  *
9150  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9151  *
9152  * [21] CDEnd ::= ']]>'
9153  */
9154 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9155 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9156     xmlChar *buf = NULL;
9157     int len = 0;
9158     int size = XML_PARSER_BUFFER_SIZE;
9159     int r, rl;
9160     int	s, sl;
9161     int cur, l;
9162     int count = 0;
9163 
9164     /* Check 2.6.0 was NXT(0) not RAW */
9165     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9166 	SKIP(9);
9167     } else
9168         return;
9169 
9170     ctxt->instate = XML_PARSER_CDATA_SECTION;
9171     r = CUR_CHAR(rl);
9172     if (!IS_CHAR(r)) {
9173 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9174 	ctxt->instate = XML_PARSER_CONTENT;
9175         return;
9176     }
9177     NEXTL(rl);
9178     s = CUR_CHAR(sl);
9179     if (!IS_CHAR(s)) {
9180 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9181 	ctxt->instate = XML_PARSER_CONTENT;
9182         return;
9183     }
9184     NEXTL(sl);
9185     cur = CUR_CHAR(l);
9186     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9187     if (buf == NULL) {
9188 	xmlErrMemory(ctxt, NULL);
9189 	return;
9190     }
9191     while (IS_CHAR(cur) &&
9192            ((r != ']') || (s != ']') || (cur != '>'))) {
9193 	if (len + 5 >= size) {
9194 	    xmlChar *tmp;
9195 
9196 	    size *= 2;
9197 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9198 	    if (tmp == NULL) {
9199 	        xmlFree(buf);
9200 		xmlErrMemory(ctxt, NULL);
9201 		return;
9202 	    }
9203 	    buf = tmp;
9204 	}
9205 	COPY_BUF(rl,buf,len,r);
9206 	r = s;
9207 	rl = sl;
9208 	s = cur;
9209 	sl = l;
9210 	count++;
9211 	if (count > 50) {
9212 	    GROW;
9213 	    count = 0;
9214 	}
9215 	NEXTL(l);
9216 	cur = CUR_CHAR(l);
9217     }
9218     buf[len] = 0;
9219     ctxt->instate = XML_PARSER_CONTENT;
9220     if (cur != '>') {
9221 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9222 	                     "CData section not finished\n%.50s\n", buf);
9223 	xmlFree(buf);
9224         return;
9225     }
9226     NEXTL(l);
9227 
9228     /*
9229      * OK the buffer is to be consumed as cdata.
9230      */
9231     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9232 	if (ctxt->sax->cdataBlock != NULL)
9233 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9234 	else if (ctxt->sax->characters != NULL)
9235 	    ctxt->sax->characters(ctxt->userData, buf, len);
9236     }
9237     xmlFree(buf);
9238 }
9239 
9240 /**
9241  * xmlParseContent:
9242  * @ctxt:  an XML parser context
9243  *
9244  * Parse a content:
9245  *
9246  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9247  */
9248 
9249 void
xmlParseContent(xmlParserCtxtPtr ctxt)9250 xmlParseContent(xmlParserCtxtPtr ctxt) {
9251     GROW;
9252     while ((RAW != 0) &&
9253 	   ((RAW != '<') || (NXT(1) != '/')) &&
9254 	   (ctxt->instate != XML_PARSER_EOF)) {
9255 	const xmlChar *test = CUR_PTR;
9256 	unsigned int cons = ctxt->input->consumed;
9257 	const xmlChar *cur = ctxt->input->cur;
9258 
9259 	/*
9260 	 * First case : a Processing Instruction.
9261 	 */
9262 	if ((*cur == '<') && (cur[1] == '?')) {
9263 	    xmlParsePI(ctxt);
9264 	}
9265 
9266 	/*
9267 	 * Second case : a CDSection
9268 	 */
9269 	/* 2.6.0 test was *cur not RAW */
9270 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9271 	    xmlParseCDSect(ctxt);
9272 	}
9273 
9274 	/*
9275 	 * Third case :  a comment
9276 	 */
9277 	else if ((*cur == '<') && (NXT(1) == '!') &&
9278 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9279 	    xmlParseComment(ctxt);
9280 	    ctxt->instate = XML_PARSER_CONTENT;
9281 	}
9282 
9283 	/*
9284 	 * Fourth case :  a sub-element.
9285 	 */
9286 	else if (*cur == '<') {
9287 	    xmlParseElement(ctxt);
9288 	}
9289 
9290 	/*
9291 	 * Fifth case : a reference. If if has not been resolved,
9292 	 *    parsing returns it's Name, create the node
9293 	 */
9294 
9295 	else if (*cur == '&') {
9296 	    xmlParseReference(ctxt);
9297 	}
9298 
9299 	/*
9300 	 * Last case, text. Note that References are handled directly.
9301 	 */
9302 	else {
9303 	    xmlParseCharData(ctxt, 0);
9304 	}
9305 
9306 	GROW;
9307 	/*
9308 	 * Pop-up of finished entities.
9309 	 */
9310 	while ((RAW == 0) && (ctxt->inputNr > 1))
9311 	    xmlPopInput(ctxt);
9312 	SHRINK;
9313 
9314 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9315 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9316 	                "detected an error in element content\n");
9317 	    ctxt->instate = XML_PARSER_EOF;
9318             break;
9319 	}
9320     }
9321 }
9322 
9323 /**
9324  * xmlParseElement:
9325  * @ctxt:  an XML parser context
9326  *
9327  * parse an XML element, this is highly recursive
9328  *
9329  * [39] element ::= EmptyElemTag | STag content ETag
9330  *
9331  * [ WFC: Element Type Match ]
9332  * The Name in an element's end-tag must match the element type in the
9333  * start-tag.
9334  *
9335  */
9336 
9337 void
xmlParseElement(xmlParserCtxtPtr ctxt)9338 xmlParseElement(xmlParserCtxtPtr ctxt) {
9339     const xmlChar *name;
9340     const xmlChar *prefix;
9341     const xmlChar *URI;
9342     xmlParserNodeInfo node_info;
9343     int line, tlen;
9344     xmlNodePtr ret;
9345     int nsNr = ctxt->nsNr;
9346 
9347     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9348         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9349 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9350 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9351 			  xmlParserMaxDepth);
9352 	ctxt->instate = XML_PARSER_EOF;
9353 	return;
9354     }
9355 
9356     /* Capture start position */
9357     if (ctxt->record_info) {
9358         node_info.begin_pos = ctxt->input->consumed +
9359                           (CUR_PTR - ctxt->input->base);
9360 	node_info.begin_line = ctxt->input->line;
9361     }
9362 
9363     if (ctxt->spaceNr == 0)
9364 	spacePush(ctxt, -1);
9365     else if (*ctxt->space == -2)
9366 	spacePush(ctxt, -1);
9367     else
9368 	spacePush(ctxt, *ctxt->space);
9369 
9370     line = ctxt->input->line;
9371 #ifdef LIBXML_SAX1_ENABLED
9372     if (ctxt->sax2)
9373 #endif /* LIBXML_SAX1_ENABLED */
9374         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9375 #ifdef LIBXML_SAX1_ENABLED
9376     else
9377 	name = xmlParseStartTag(ctxt);
9378 #endif /* LIBXML_SAX1_ENABLED */
9379     if (name == NULL) {
9380 	spacePop(ctxt);
9381         return;
9382     }
9383     namePush(ctxt, name);
9384     ret = ctxt->node;
9385 
9386 #ifdef LIBXML_VALID_ENABLED
9387     /*
9388      * [ VC: Root Element Type ]
9389      * The Name in the document type declaration must match the element
9390      * type of the root element.
9391      */
9392     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9393         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9394         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9395 #endif /* LIBXML_VALID_ENABLED */
9396 
9397     /*
9398      * Check for an Empty Element.
9399      */
9400     if ((RAW == '/') && (NXT(1) == '>')) {
9401         SKIP(2);
9402 	if (ctxt->sax2) {
9403 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9404 		(!ctxt->disableSAX))
9405 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9406 #ifdef LIBXML_SAX1_ENABLED
9407 	} else {
9408 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9409 		(!ctxt->disableSAX))
9410 		ctxt->sax->endElement(ctxt->userData, name);
9411 #endif /* LIBXML_SAX1_ENABLED */
9412 	}
9413 	namePop(ctxt);
9414 	spacePop(ctxt);
9415 	if (nsNr != ctxt->nsNr)
9416 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9417 	if ( ret != NULL && ctxt->record_info ) {
9418 	   node_info.end_pos = ctxt->input->consumed +
9419 			      (CUR_PTR - ctxt->input->base);
9420 	   node_info.end_line = ctxt->input->line;
9421 	   node_info.node = ret;
9422 	   xmlParserAddNodeInfo(ctxt, &node_info);
9423 	}
9424 	return;
9425     }
9426     if (RAW == '>') {
9427         NEXT1;
9428     } else {
9429         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9430 		     "Couldn't find end of Start Tag %s line %d\n",
9431 		                name, line, NULL);
9432 
9433 	/*
9434 	 * end of parsing of this node.
9435 	 */
9436 	nodePop(ctxt);
9437 	namePop(ctxt);
9438 	spacePop(ctxt);
9439 	if (nsNr != ctxt->nsNr)
9440 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9441 
9442 	/*
9443 	 * Capture end position and add node
9444 	 */
9445 	if ( ret != NULL && ctxt->record_info ) {
9446 	   node_info.end_pos = ctxt->input->consumed +
9447 			      (CUR_PTR - ctxt->input->base);
9448 	   node_info.end_line = ctxt->input->line;
9449 	   node_info.node = ret;
9450 	   xmlParserAddNodeInfo(ctxt, &node_info);
9451 	}
9452 	return;
9453     }
9454 
9455     /*
9456      * Parse the content of the element:
9457      */
9458     xmlParseContent(ctxt);
9459     if (!IS_BYTE_CHAR(RAW)) {
9460         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9461 	 "Premature end of data in tag %s line %d\n",
9462 		                name, line, NULL);
9463 
9464 	/*
9465 	 * end of parsing of this node.
9466 	 */
9467 	nodePop(ctxt);
9468 	namePop(ctxt);
9469 	spacePop(ctxt);
9470 	if (nsNr != ctxt->nsNr)
9471 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9472 	return;
9473     }
9474 
9475     /*
9476      * parse the end of tag: '</' should be here.
9477      */
9478     if (ctxt->sax2) {
9479 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9480 	namePop(ctxt);
9481     }
9482 #ifdef LIBXML_SAX1_ENABLED
9483       else
9484 	xmlParseEndTag1(ctxt, line);
9485 #endif /* LIBXML_SAX1_ENABLED */
9486 
9487     /*
9488      * Capture end position and add node
9489      */
9490     if ( ret != NULL && ctxt->record_info ) {
9491        node_info.end_pos = ctxt->input->consumed +
9492                           (CUR_PTR - ctxt->input->base);
9493        node_info.end_line = ctxt->input->line;
9494        node_info.node = ret;
9495        xmlParserAddNodeInfo(ctxt, &node_info);
9496     }
9497 }
9498 
9499 /**
9500  * xmlParseVersionNum:
9501  * @ctxt:  an XML parser context
9502  *
9503  * parse the XML version value.
9504  *
9505  * [26] VersionNum ::= '1.' [0-9]+
9506  *
9507  * In practice allow [0-9].[0-9]+ at that level
9508  *
9509  * Returns the string giving the XML version number, or NULL
9510  */
9511 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9512 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9513     xmlChar *buf = NULL;
9514     int len = 0;
9515     int size = 10;
9516     xmlChar cur;
9517 
9518     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9519     if (buf == NULL) {
9520 	xmlErrMemory(ctxt, NULL);
9521 	return(NULL);
9522     }
9523     cur = CUR;
9524     if (!((cur >= '0') && (cur <= '9'))) {
9525 	xmlFree(buf);
9526 	return(NULL);
9527     }
9528     buf[len++] = cur;
9529     NEXT;
9530     cur=CUR;
9531     if (cur != '.') {
9532 	xmlFree(buf);
9533 	return(NULL);
9534     }
9535     buf[len++] = cur;
9536     NEXT;
9537     cur=CUR;
9538     while ((cur >= '0') && (cur <= '9')) {
9539 	if (len + 1 >= size) {
9540 	    xmlChar *tmp;
9541 
9542 	    size *= 2;
9543 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9544 	    if (tmp == NULL) {
9545 	        xmlFree(buf);
9546 		xmlErrMemory(ctxt, NULL);
9547 		return(NULL);
9548 	    }
9549 	    buf = tmp;
9550 	}
9551 	buf[len++] = cur;
9552 	NEXT;
9553 	cur=CUR;
9554     }
9555     buf[len] = 0;
9556     return(buf);
9557 }
9558 
9559 /**
9560  * xmlParseVersionInfo:
9561  * @ctxt:  an XML parser context
9562  *
9563  * parse the XML version.
9564  *
9565  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9566  *
9567  * [25] Eq ::= S? '=' S?
9568  *
9569  * Returns the version string, e.g. "1.0"
9570  */
9571 
9572 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)9573 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9574     xmlChar *version = NULL;
9575 
9576     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9577 	SKIP(7);
9578 	SKIP_BLANKS;
9579 	if (RAW != '=') {
9580 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9581 	    return(NULL);
9582         }
9583 	NEXT;
9584 	SKIP_BLANKS;
9585 	if (RAW == '"') {
9586 	    NEXT;
9587 	    version = xmlParseVersionNum(ctxt);
9588 	    if (RAW != '"') {
9589 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9590 	    } else
9591 	        NEXT;
9592 	} else if (RAW == '\''){
9593 	    NEXT;
9594 	    version = xmlParseVersionNum(ctxt);
9595 	    if (RAW != '\'') {
9596 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9597 	    } else
9598 	        NEXT;
9599 	} else {
9600 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9601 	}
9602     }
9603     return(version);
9604 }
9605 
9606 /**
9607  * xmlParseEncName:
9608  * @ctxt:  an XML parser context
9609  *
9610  * parse the XML encoding name
9611  *
9612  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9613  *
9614  * Returns the encoding name value or NULL
9615  */
9616 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)9617 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9618     xmlChar *buf = NULL;
9619     int len = 0;
9620     int size = 10;
9621     xmlChar cur;
9622 
9623     cur = CUR;
9624     if (((cur >= 'a') && (cur <= 'z')) ||
9625         ((cur >= 'A') && (cur <= 'Z'))) {
9626 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9627 	if (buf == NULL) {
9628 	    xmlErrMemory(ctxt, NULL);
9629 	    return(NULL);
9630 	}
9631 
9632 	buf[len++] = cur;
9633 	NEXT;
9634 	cur = CUR;
9635 	while (((cur >= 'a') && (cur <= 'z')) ||
9636 	       ((cur >= 'A') && (cur <= 'Z')) ||
9637 	       ((cur >= '0') && (cur <= '9')) ||
9638 	       (cur == '.') || (cur == '_') ||
9639 	       (cur == '-')) {
9640 	    if (len + 1 >= size) {
9641 	        xmlChar *tmp;
9642 
9643 		size *= 2;
9644 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9645 		if (tmp == NULL) {
9646 		    xmlErrMemory(ctxt, NULL);
9647 		    xmlFree(buf);
9648 		    return(NULL);
9649 		}
9650 		buf = tmp;
9651 	    }
9652 	    buf[len++] = cur;
9653 	    NEXT;
9654 	    cur = CUR;
9655 	    if (cur == 0) {
9656 	        SHRINK;
9657 		GROW;
9658 		cur = CUR;
9659 	    }
9660         }
9661 	buf[len] = 0;
9662     } else {
9663 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9664     }
9665     return(buf);
9666 }
9667 
9668 /**
9669  * xmlParseEncodingDecl:
9670  * @ctxt:  an XML parser context
9671  *
9672  * parse the XML encoding declaration
9673  *
9674  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
9675  *
9676  * this setups the conversion filters.
9677  *
9678  * Returns the encoding value or NULL
9679  */
9680 
9681 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)9682 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9683     xmlChar *encoding = NULL;
9684 
9685     SKIP_BLANKS;
9686     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9687 	SKIP(8);
9688 	SKIP_BLANKS;
9689 	if (RAW != '=') {
9690 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9691 	    return(NULL);
9692         }
9693 	NEXT;
9694 	SKIP_BLANKS;
9695 	if (RAW == '"') {
9696 	    NEXT;
9697 	    encoding = xmlParseEncName(ctxt);
9698 	    if (RAW != '"') {
9699 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9700 	    } else
9701 	        NEXT;
9702 	} else if (RAW == '\''){
9703 	    NEXT;
9704 	    encoding = xmlParseEncName(ctxt);
9705 	    if (RAW != '\'') {
9706 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9707 	    } else
9708 	        NEXT;
9709 	} else {
9710 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9711 	}
9712 	/*
9713 	 * UTF-16 encoding stwich has already taken place at this stage,
9714 	 * more over the little-endian/big-endian selection is already done
9715 	 */
9716         if ((encoding != NULL) &&
9717 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9718 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9719 	    /*
9720 	     * If no encoding was passed to the parser, that we are
9721 	     * using UTF-16 and no decoder is present i.e. the
9722 	     * document is apparently UTF-8 compatible, then raise an
9723 	     * encoding mismatch fatal error
9724 	     */
9725 	    if ((ctxt->encoding == NULL) &&
9726 	        (ctxt->input->buf != NULL) &&
9727 	        (ctxt->input->buf->encoder == NULL)) {
9728 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9729 		  "Document labelled UTF-16 but has UTF-8 content\n");
9730 	    }
9731 	    if (ctxt->encoding != NULL)
9732 		xmlFree((xmlChar *) ctxt->encoding);
9733 	    ctxt->encoding = encoding;
9734 	}
9735 	/*
9736 	 * UTF-8 encoding is handled natively
9737 	 */
9738         else if ((encoding != NULL) &&
9739 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9740 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9741 	    if (ctxt->encoding != NULL)
9742 		xmlFree((xmlChar *) ctxt->encoding);
9743 	    ctxt->encoding = encoding;
9744 	}
9745 	else if (encoding != NULL) {
9746 	    xmlCharEncodingHandlerPtr handler;
9747 
9748 	    if (ctxt->input->encoding != NULL)
9749 		xmlFree((xmlChar *) ctxt->input->encoding);
9750 	    ctxt->input->encoding = encoding;
9751 
9752             handler = xmlFindCharEncodingHandler((const char *) encoding);
9753 	    if (handler != NULL) {
9754 		xmlSwitchToEncoding(ctxt, handler);
9755 	    } else {
9756 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9757 			"Unsupported encoding %s\n", encoding);
9758 		return(NULL);
9759 	    }
9760 	}
9761     }
9762     return(encoding);
9763 }
9764 
9765 /**
9766  * xmlParseSDDecl:
9767  * @ctxt:  an XML parser context
9768  *
9769  * parse the XML standalone declaration
9770  *
9771  * [32] SDDecl ::= S 'standalone' Eq
9772  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9773  *
9774  * [ VC: Standalone Document Declaration ]
9775  * TODO The standalone document declaration must have the value "no"
9776  * if any external markup declarations contain declarations of:
9777  *  - attributes with default values, if elements to which these
9778  *    attributes apply appear in the document without specifications
9779  *    of values for these attributes, or
9780  *  - entities (other than amp, lt, gt, apos, quot), if references
9781  *    to those entities appear in the document, or
9782  *  - attributes with values subject to normalization, where the
9783  *    attribute appears in the document with a value which will change
9784  *    as a result of normalization, or
9785  *  - element types with element content, if white space occurs directly
9786  *    within any instance of those types.
9787  *
9788  * Returns:
9789  *   1 if standalone="yes"
9790  *   0 if standalone="no"
9791  *  -2 if standalone attribute is missing or invalid
9792  *	  (A standalone value of -2 means that the XML declaration was found,
9793  *	   but no value was specified for the standalone attribute).
9794  */
9795 
9796 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)9797 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9798     int standalone = -2;
9799 
9800     SKIP_BLANKS;
9801     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9802 	SKIP(10);
9803         SKIP_BLANKS;
9804 	if (RAW != '=') {
9805 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9806 	    return(standalone);
9807         }
9808 	NEXT;
9809 	SKIP_BLANKS;
9810         if (RAW == '\''){
9811 	    NEXT;
9812 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
9813 	        standalone = 0;
9814                 SKIP(2);
9815 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9816 	               (NXT(2) == 's')) {
9817 	        standalone = 1;
9818 		SKIP(3);
9819             } else {
9820 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9821 	    }
9822 	    if (RAW != '\'') {
9823 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9824 	    } else
9825 	        NEXT;
9826 	} else if (RAW == '"'){
9827 	    NEXT;
9828 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
9829 	        standalone = 0;
9830 		SKIP(2);
9831 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9832 	               (NXT(2) == 's')) {
9833 	        standalone = 1;
9834                 SKIP(3);
9835             } else {
9836 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9837 	    }
9838 	    if (RAW != '"') {
9839 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9840 	    } else
9841 	        NEXT;
9842 	} else {
9843 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9844         }
9845     }
9846     return(standalone);
9847 }
9848 
9849 /**
9850  * xmlParseXMLDecl:
9851  * @ctxt:  an XML parser context
9852  *
9853  * parse an XML declaration header
9854  *
9855  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9856  */
9857 
9858 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)9859 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9860     xmlChar *version;
9861 
9862     /*
9863      * This value for standalone indicates that the document has an
9864      * XML declaration but it does not have a standalone attribute.
9865      * It will be overwritten later if a standalone attribute is found.
9866      */
9867     ctxt->input->standalone = -2;
9868 
9869     /*
9870      * We know that '<?xml' is here.
9871      */
9872     SKIP(5);
9873 
9874     if (!IS_BLANK_CH(RAW)) {
9875 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9876 	               "Blank needed after '<?xml'\n");
9877     }
9878     SKIP_BLANKS;
9879 
9880     /*
9881      * We must have the VersionInfo here.
9882      */
9883     version = xmlParseVersionInfo(ctxt);
9884     if (version == NULL) {
9885 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9886     } else {
9887 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9888 	    /*
9889 	     * Changed here for XML-1.0 5th edition
9890 	     */
9891 	    if (ctxt->options & XML_PARSE_OLD10) {
9892 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9893 			          "Unsupported version '%s'\n",
9894 			          version);
9895 	    } else {
9896 	        if ((version[0] == '1') && ((version[1] == '.'))) {
9897 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9898 		                  "Unsupported version '%s'\n",
9899 				  version, NULL);
9900 		} else {
9901 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9902 				      "Unsupported version '%s'\n",
9903 				      version);
9904 		}
9905 	    }
9906 	}
9907 	if (ctxt->version != NULL)
9908 	    xmlFree((void *) ctxt->version);
9909 	ctxt->version = version;
9910     }
9911 
9912     /*
9913      * We may have the encoding declaration
9914      */
9915     if (!IS_BLANK_CH(RAW)) {
9916         if ((RAW == '?') && (NXT(1) == '>')) {
9917 	    SKIP(2);
9918 	    return;
9919 	}
9920 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9921     }
9922     xmlParseEncodingDecl(ctxt);
9923     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9924 	/*
9925 	 * The XML REC instructs us to stop parsing right here
9926 	 */
9927         return;
9928     }
9929 
9930     /*
9931      * We may have the standalone status.
9932      */
9933     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9934         if ((RAW == '?') && (NXT(1) == '>')) {
9935 	    SKIP(2);
9936 	    return;
9937 	}
9938 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9939     }
9940     SKIP_BLANKS;
9941     ctxt->input->standalone = xmlParseSDDecl(ctxt);
9942 
9943     SKIP_BLANKS;
9944     if ((RAW == '?') && (NXT(1) == '>')) {
9945         SKIP(2);
9946     } else if (RAW == '>') {
9947         /* Deprecated old WD ... */
9948 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9949 	NEXT;
9950     } else {
9951 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9952 	MOVETO_ENDTAG(CUR_PTR);
9953 	NEXT;
9954     }
9955 }
9956 
9957 /**
9958  * xmlParseMisc:
9959  * @ctxt:  an XML parser context
9960  *
9961  * parse an XML Misc* optional field.
9962  *
9963  * [27] Misc ::= Comment | PI |  S
9964  */
9965 
9966 void
xmlParseMisc(xmlParserCtxtPtr ctxt)9967 xmlParseMisc(xmlParserCtxtPtr ctxt) {
9968     while (((RAW == '<') && (NXT(1) == '?')) ||
9969            (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9970            IS_BLANK_CH(CUR)) {
9971         if ((RAW == '<') && (NXT(1) == '?')) {
9972 	    xmlParsePI(ctxt);
9973 	} else if (IS_BLANK_CH(CUR)) {
9974 	    NEXT;
9975 	} else
9976 	    xmlParseComment(ctxt);
9977     }
9978 }
9979 
9980 /**
9981  * xmlParseDocument:
9982  * @ctxt:  an XML parser context
9983  *
9984  * parse an XML document (and build a tree if using the standard SAX
9985  * interface).
9986  *
9987  * [1] document ::= prolog element Misc*
9988  *
9989  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9990  *
9991  * Returns 0, -1 in case of error. the parser context is augmented
9992  *                as a result of the parsing.
9993  */
9994 
9995 int
xmlParseDocument(xmlParserCtxtPtr ctxt)9996 xmlParseDocument(xmlParserCtxtPtr ctxt) {
9997     xmlChar start[4];
9998     xmlCharEncoding enc;
9999 
10000     xmlInitParser();
10001 
10002     if ((ctxt == NULL) || (ctxt->input == NULL))
10003         return(-1);
10004 
10005     GROW;
10006 
10007     /*
10008      * SAX: detecting the level.
10009      */
10010     xmlDetectSAX2(ctxt);
10011 
10012     /*
10013      * SAX: beginning of the document processing.
10014      */
10015     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10016         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10017 
10018     if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10019         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10020 	/*
10021 	 * Get the 4 first bytes and decode the charset
10022 	 * if enc != XML_CHAR_ENCODING_NONE
10023 	 * plug some encoding conversion routines.
10024 	 */
10025 	start[0] = RAW;
10026 	start[1] = NXT(1);
10027 	start[2] = NXT(2);
10028 	start[3] = NXT(3);
10029 	enc = xmlDetectCharEncoding(&start[0], 4);
10030 	if (enc != XML_CHAR_ENCODING_NONE) {
10031 	    xmlSwitchEncoding(ctxt, enc);
10032 	}
10033     }
10034 
10035 
10036     if (CUR == 0) {
10037 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10038     }
10039 
10040     /*
10041      * Check for the XMLDecl in the Prolog.
10042      */
10043     GROW;
10044     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10045 
10046 	/*
10047 	 * Note that we will switch encoding on the fly.
10048 	 */
10049 	xmlParseXMLDecl(ctxt);
10050 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10051 	    /*
10052 	     * The XML REC instructs us to stop parsing right here
10053 	     */
10054 	    return(-1);
10055 	}
10056 	ctxt->standalone = ctxt->input->standalone;
10057 	SKIP_BLANKS;
10058     } else {
10059 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10060     }
10061     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10062         ctxt->sax->startDocument(ctxt->userData);
10063 
10064     /*
10065      * The Misc part of the Prolog
10066      */
10067     GROW;
10068     xmlParseMisc(ctxt);
10069 
10070     /*
10071      * Then possibly doc type declaration(s) and more Misc
10072      * (doctypedecl Misc*)?
10073      */
10074     GROW;
10075     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10076 
10077 	ctxt->inSubset = 1;
10078 	xmlParseDocTypeDecl(ctxt);
10079 	if (RAW == '[') {
10080 	    ctxt->instate = XML_PARSER_DTD;
10081 	    xmlParseInternalSubset(ctxt);
10082 	}
10083 
10084 	/*
10085 	 * Create and update the external subset.
10086 	 */
10087 	ctxt->inSubset = 2;
10088 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10089 	    (!ctxt->disableSAX))
10090 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10091 	                              ctxt->extSubSystem, ctxt->extSubURI);
10092 	ctxt->inSubset = 0;
10093 
10094         xmlCleanSpecialAttr(ctxt);
10095 
10096 	ctxt->instate = XML_PARSER_PROLOG;
10097 	xmlParseMisc(ctxt);
10098     }
10099 
10100     /*
10101      * Time to start parsing the tree itself
10102      */
10103     GROW;
10104     if (RAW != '<') {
10105 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10106 		       "Start tag expected, '<' not found\n");
10107     } else {
10108 	ctxt->instate = XML_PARSER_CONTENT;
10109 	xmlParseElement(ctxt);
10110 	ctxt->instate = XML_PARSER_EPILOG;
10111 
10112 
10113 	/*
10114 	 * The Misc part at the end
10115 	 */
10116 	xmlParseMisc(ctxt);
10117 
10118 	if (RAW != 0) {
10119 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10120 	}
10121 	ctxt->instate = XML_PARSER_EOF;
10122     }
10123 
10124     /*
10125      * SAX: end of the document processing.
10126      */
10127     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10128         ctxt->sax->endDocument(ctxt->userData);
10129 
10130     /*
10131      * Remove locally kept entity definitions if the tree was not built
10132      */
10133     if ((ctxt->myDoc != NULL) &&
10134 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10135 	xmlFreeDoc(ctxt->myDoc);
10136 	ctxt->myDoc = NULL;
10137     }
10138 
10139     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10140         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10141 	if (ctxt->valid)
10142 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10143 	if (ctxt->nsWellFormed)
10144 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10145 	if (ctxt->options & XML_PARSE_OLD10)
10146 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10147     }
10148     if (! ctxt->wellFormed) {
10149 	ctxt->valid = 0;
10150 	return(-1);
10151     }
10152     return(0);
10153 }
10154 
10155 /**
10156  * xmlParseExtParsedEnt:
10157  * @ctxt:  an XML parser context
10158  *
10159  * parse a general parsed entity
10160  * An external general parsed entity is well-formed if it matches the
10161  * production labeled extParsedEnt.
10162  *
10163  * [78] extParsedEnt ::= TextDecl? content
10164  *
10165  * Returns 0, -1 in case of error. the parser context is augmented
10166  *                as a result of the parsing.
10167  */
10168 
10169 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10170 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10171     xmlChar start[4];
10172     xmlCharEncoding enc;
10173 
10174     if ((ctxt == NULL) || (ctxt->input == NULL))
10175         return(-1);
10176 
10177     xmlDefaultSAXHandlerInit();
10178 
10179     xmlDetectSAX2(ctxt);
10180 
10181     GROW;
10182 
10183     /*
10184      * SAX: beginning of the document processing.
10185      */
10186     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10187         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10188 
10189     /*
10190      * Get the 4 first bytes and decode the charset
10191      * if enc != XML_CHAR_ENCODING_NONE
10192      * plug some encoding conversion routines.
10193      */
10194     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10195 	start[0] = RAW;
10196 	start[1] = NXT(1);
10197 	start[2] = NXT(2);
10198 	start[3] = NXT(3);
10199 	enc = xmlDetectCharEncoding(start, 4);
10200 	if (enc != XML_CHAR_ENCODING_NONE) {
10201 	    xmlSwitchEncoding(ctxt, enc);
10202 	}
10203     }
10204 
10205 
10206     if (CUR == 0) {
10207 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10208     }
10209 
10210     /*
10211      * Check for the XMLDecl in the Prolog.
10212      */
10213     GROW;
10214     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10215 
10216 	/*
10217 	 * Note that we will switch encoding on the fly.
10218 	 */
10219 	xmlParseXMLDecl(ctxt);
10220 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10221 	    /*
10222 	     * The XML REC instructs us to stop parsing right here
10223 	     */
10224 	    return(-1);
10225 	}
10226 	SKIP_BLANKS;
10227     } else {
10228 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10229     }
10230     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10231         ctxt->sax->startDocument(ctxt->userData);
10232 
10233     /*
10234      * Doing validity checking on chunk doesn't make sense
10235      */
10236     ctxt->instate = XML_PARSER_CONTENT;
10237     ctxt->validate = 0;
10238     ctxt->loadsubset = 0;
10239     ctxt->depth = 0;
10240 
10241     xmlParseContent(ctxt);
10242 
10243     if ((RAW == '<') && (NXT(1) == '/')) {
10244 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10245     } else if (RAW != 0) {
10246 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10247     }
10248 
10249     /*
10250      * SAX: end of the document processing.
10251      */
10252     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10253         ctxt->sax->endDocument(ctxt->userData);
10254 
10255     if (! ctxt->wellFormed) return(-1);
10256     return(0);
10257 }
10258 
10259 #ifdef LIBXML_PUSH_ENABLED
10260 /************************************************************************
10261  *									*
10262  * 		Progressive parsing interfaces				*
10263  *									*
10264  ************************************************************************/
10265 
10266 /**
10267  * xmlParseLookupSequence:
10268  * @ctxt:  an XML parser context
10269  * @first:  the first char to lookup
10270  * @next:  the next char to lookup or zero
10271  * @third:  the next char to lookup or zero
10272  *
10273  * Try to find if a sequence (first, next, third) or  just (first next) or
10274  * (first) is available in the input stream.
10275  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10276  * to avoid rescanning sequences of bytes, it DOES change the state of the
10277  * parser, do not use liberally.
10278  *
10279  * Returns the index to the current parsing point if the full sequence
10280  *      is available, -1 otherwise.
10281  */
10282 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10283 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10284                        xmlChar next, xmlChar third) {
10285     int base, len;
10286     xmlParserInputPtr in;
10287     const xmlChar *buf;
10288 
10289     in = ctxt->input;
10290     if (in == NULL) return(-1);
10291     base = in->cur - in->base;
10292     if (base < 0) return(-1);
10293     if (ctxt->checkIndex > base)
10294         base = ctxt->checkIndex;
10295     if (in->buf == NULL) {
10296 	buf = in->base;
10297 	len = in->length;
10298     } else {
10299 	buf = in->buf->buffer->content;
10300 	len = in->buf->buffer->use;
10301     }
10302     /* take into account the sequence length */
10303     if (third) len -= 2;
10304     else if (next) len --;
10305     for (;base < len;base++) {
10306         if (buf[base] == first) {
10307 	    if (third != 0) {
10308 		if ((buf[base + 1] != next) ||
10309 		    (buf[base + 2] != third)) continue;
10310 	    } else if (next != 0) {
10311 		if (buf[base + 1] != next) continue;
10312 	    }
10313 	    ctxt->checkIndex = 0;
10314 #ifdef DEBUG_PUSH
10315 	    if (next == 0)
10316 		xmlGenericError(xmlGenericErrorContext,
10317 			"PP: lookup '%c' found at %d\n",
10318 			first, base);
10319 	    else if (third == 0)
10320 		xmlGenericError(xmlGenericErrorContext,
10321 			"PP: lookup '%c%c' found at %d\n",
10322 			first, next, base);
10323 	    else
10324 		xmlGenericError(xmlGenericErrorContext,
10325 			"PP: lookup '%c%c%c' found at %d\n",
10326 			first, next, third, base);
10327 #endif
10328 	    return(base - (in->cur - in->base));
10329 	}
10330     }
10331     ctxt->checkIndex = base;
10332 #ifdef DEBUG_PUSH
10333     if (next == 0)
10334 	xmlGenericError(xmlGenericErrorContext,
10335 		"PP: lookup '%c' failed\n", first);
10336     else if (third == 0)
10337 	xmlGenericError(xmlGenericErrorContext,
10338 		"PP: lookup '%c%c' failed\n", first, next);
10339     else
10340 	xmlGenericError(xmlGenericErrorContext,
10341 		"PP: lookup '%c%c%c' failed\n", first, next, third);
10342 #endif
10343     return(-1);
10344 }
10345 
10346 /**
10347  * xmlParseGetLasts:
10348  * @ctxt:  an XML parser context
10349  * @lastlt:  pointer to store the last '<' from the input
10350  * @lastgt:  pointer to store the last '>' from the input
10351  *
10352  * Lookup the last < and > in the current chunk
10353  */
10354 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10355 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10356                  const xmlChar **lastgt) {
10357     const xmlChar *tmp;
10358 
10359     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10360 	xmlGenericError(xmlGenericErrorContext,
10361 		    "Internal error: xmlParseGetLasts\n");
10362 	return;
10363     }
10364     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10365         tmp = ctxt->input->end;
10366 	tmp--;
10367 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10368 	if (tmp < ctxt->input->base) {
10369 	    *lastlt = NULL;
10370 	    *lastgt = NULL;
10371 	} else {
10372 	    *lastlt = tmp;
10373 	    tmp++;
10374 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10375 	        if (*tmp == '\'') {
10376 		    tmp++;
10377 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10378 		    if (tmp < ctxt->input->end) tmp++;
10379 		} else if (*tmp == '"') {
10380 		    tmp++;
10381 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10382 		    if (tmp < ctxt->input->end) tmp++;
10383 		} else
10384 		    tmp++;
10385 	    }
10386 	    if (tmp < ctxt->input->end)
10387 	        *lastgt = tmp;
10388 	    else {
10389 	        tmp = *lastlt;
10390 		tmp--;
10391 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10392 		if (tmp >= ctxt->input->base)
10393 		    *lastgt = tmp;
10394 		else
10395 		    *lastgt = NULL;
10396 	    }
10397 	}
10398     } else {
10399         *lastlt = NULL;
10400 	*lastgt = NULL;
10401     }
10402 }
10403 /**
10404  * xmlCheckCdataPush:
10405  * @cur: pointer to the bock of characters
10406  * @len: length of the block in bytes
10407  *
10408  * Check that the block of characters is okay as SCdata content [20]
10409  *
10410  * Returns the number of bytes to pass if okay, a negative index where an
10411  *         UTF-8 error occured otherwise
10412  */
10413 static int
xmlCheckCdataPush(const xmlChar * utf,int len)10414 xmlCheckCdataPush(const xmlChar *utf, int len) {
10415     int ix;
10416     unsigned char c;
10417     int codepoint;
10418 
10419     if ((utf == NULL) || (len <= 0))
10420         return(0);
10421 
10422     for (ix = 0; ix < len;) {      /* string is 0-terminated */
10423         c = utf[ix];
10424         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
10425 	    if (c >= 0x20)
10426 		ix++;
10427 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10428 	        ix++;
10429 	    else
10430 	        return(-ix);
10431 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10432 	    if (ix + 2 > len) return(ix);
10433 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
10434 	        return(-ix);
10435 	    codepoint = (utf[ix] & 0x1f) << 6;
10436 	    codepoint |= utf[ix+1] & 0x3f;
10437 	    if (!xmlIsCharQ(codepoint))
10438 	        return(-ix);
10439 	    ix += 2;
10440 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10441 	    if (ix + 3 > len) return(ix);
10442 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10443 	        ((utf[ix+2] & 0xc0) != 0x80))
10444 		    return(-ix);
10445 	    codepoint = (utf[ix] & 0xf) << 12;
10446 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
10447 	    codepoint |= utf[ix+2] & 0x3f;
10448 	    if (!xmlIsCharQ(codepoint))
10449 	        return(-ix);
10450 	    ix += 3;
10451 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10452 	    if (ix + 4 > len) return(ix);
10453 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10454 	        ((utf[ix+2] & 0xc0) != 0x80) ||
10455 		((utf[ix+3] & 0xc0) != 0x80))
10456 		    return(-ix);
10457 	    codepoint = (utf[ix] & 0x7) << 18;
10458 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
10459 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
10460 	    codepoint |= utf[ix+3] & 0x3f;
10461 	    if (!xmlIsCharQ(codepoint))
10462 	        return(-ix);
10463 	    ix += 4;
10464 	} else				/* unknown encoding */
10465 	    return(-ix);
10466       }
10467       return(ix);
10468 }
10469 
10470 /**
10471  * xmlParseTryOrFinish:
10472  * @ctxt:  an XML parser context
10473  * @terminate:  last chunk indicator
10474  *
10475  * Try to progress on parsing
10476  *
10477  * Returns zero if no parsing was possible
10478  */
10479 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)10480 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10481     int ret = 0;
10482     int avail, tlen;
10483     xmlChar cur, next;
10484     const xmlChar *lastlt, *lastgt;
10485 
10486     if (ctxt->input == NULL)
10487         return(0);
10488 
10489 #ifdef DEBUG_PUSH
10490     switch (ctxt->instate) {
10491 	case XML_PARSER_EOF:
10492 	    xmlGenericError(xmlGenericErrorContext,
10493 		    "PP: try EOF\n"); break;
10494 	case XML_PARSER_START:
10495 	    xmlGenericError(xmlGenericErrorContext,
10496 		    "PP: try START\n"); break;
10497 	case XML_PARSER_MISC:
10498 	    xmlGenericError(xmlGenericErrorContext,
10499 		    "PP: try MISC\n");break;
10500 	case XML_PARSER_COMMENT:
10501 	    xmlGenericError(xmlGenericErrorContext,
10502 		    "PP: try COMMENT\n");break;
10503 	case XML_PARSER_PROLOG:
10504 	    xmlGenericError(xmlGenericErrorContext,
10505 		    "PP: try PROLOG\n");break;
10506 	case XML_PARSER_START_TAG:
10507 	    xmlGenericError(xmlGenericErrorContext,
10508 		    "PP: try START_TAG\n");break;
10509 	case XML_PARSER_CONTENT:
10510 	    xmlGenericError(xmlGenericErrorContext,
10511 		    "PP: try CONTENT\n");break;
10512 	case XML_PARSER_CDATA_SECTION:
10513 	    xmlGenericError(xmlGenericErrorContext,
10514 		    "PP: try CDATA_SECTION\n");break;
10515 	case XML_PARSER_END_TAG:
10516 	    xmlGenericError(xmlGenericErrorContext,
10517 		    "PP: try END_TAG\n");break;
10518 	case XML_PARSER_ENTITY_DECL:
10519 	    xmlGenericError(xmlGenericErrorContext,
10520 		    "PP: try ENTITY_DECL\n");break;
10521 	case XML_PARSER_ENTITY_VALUE:
10522 	    xmlGenericError(xmlGenericErrorContext,
10523 		    "PP: try ENTITY_VALUE\n");break;
10524 	case XML_PARSER_ATTRIBUTE_VALUE:
10525 	    xmlGenericError(xmlGenericErrorContext,
10526 		    "PP: try ATTRIBUTE_VALUE\n");break;
10527 	case XML_PARSER_DTD:
10528 	    xmlGenericError(xmlGenericErrorContext,
10529 		    "PP: try DTD\n");break;
10530 	case XML_PARSER_EPILOG:
10531 	    xmlGenericError(xmlGenericErrorContext,
10532 		    "PP: try EPILOG\n");break;
10533 	case XML_PARSER_PI:
10534 	    xmlGenericError(xmlGenericErrorContext,
10535 		    "PP: try PI\n");break;
10536         case XML_PARSER_IGNORE:
10537             xmlGenericError(xmlGenericErrorContext,
10538 		    "PP: try IGNORE\n");break;
10539     }
10540 #endif
10541 
10542     if ((ctxt->input != NULL) &&
10543         (ctxt->input->cur - ctxt->input->base > 4096)) {
10544 	xmlSHRINK(ctxt);
10545 	ctxt->checkIndex = 0;
10546     }
10547     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10548 
10549     while (1) {
10550 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10551 	    return(0);
10552 
10553 
10554 	/*
10555 	 * Pop-up of finished entities.
10556 	 */
10557 	while ((RAW == 0) && (ctxt->inputNr > 1))
10558 	    xmlPopInput(ctxt);
10559 
10560 	if (ctxt->input == NULL) break;
10561 	if (ctxt->input->buf == NULL)
10562 	    avail = ctxt->input->length -
10563 	            (ctxt->input->cur - ctxt->input->base);
10564 	else {
10565 	    /*
10566 	     * If we are operating on converted input, try to flush
10567 	     * remainng chars to avoid them stalling in the non-converted
10568 	     * buffer.
10569 	     */
10570 	    if ((ctxt->input->buf->raw != NULL) &&
10571 		(ctxt->input->buf->raw->use > 0)) {
10572 		int base = ctxt->input->base -
10573 		           ctxt->input->buf->buffer->content;
10574 		int current = ctxt->input->cur - ctxt->input->base;
10575 
10576 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10577 		ctxt->input->base = ctxt->input->buf->buffer->content + base;
10578 		ctxt->input->cur = ctxt->input->base + current;
10579 		ctxt->input->end =
10580 		    &ctxt->input->buf->buffer->content[
10581 		                       ctxt->input->buf->buffer->use];
10582 	    }
10583 	    avail = ctxt->input->buf->buffer->use -
10584 		    (ctxt->input->cur - ctxt->input->base);
10585 	}
10586         if (avail < 1)
10587 	    goto done;
10588         switch (ctxt->instate) {
10589             case XML_PARSER_EOF:
10590 	        /*
10591 		 * Document parsing is done !
10592 		 */
10593 	        goto done;
10594             case XML_PARSER_START:
10595 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10596 		    xmlChar start[4];
10597 		    xmlCharEncoding enc;
10598 
10599 		    /*
10600 		     * Very first chars read from the document flow.
10601 		     */
10602 		    if (avail < 4)
10603 			goto done;
10604 
10605 		    /*
10606 		     * Get the 4 first bytes and decode the charset
10607 		     * if enc != XML_CHAR_ENCODING_NONE
10608 		     * plug some encoding conversion routines,
10609 		     * else xmlSwitchEncoding will set to (default)
10610 		     * UTF8.
10611 		     */
10612 		    start[0] = RAW;
10613 		    start[1] = NXT(1);
10614 		    start[2] = NXT(2);
10615 		    start[3] = NXT(3);
10616 		    enc = xmlDetectCharEncoding(start, 4);
10617 		    xmlSwitchEncoding(ctxt, enc);
10618 		    break;
10619 		}
10620 
10621 		if (avail < 2)
10622 		    goto done;
10623 		cur = ctxt->input->cur[0];
10624 		next = ctxt->input->cur[1];
10625 		if (cur == 0) {
10626 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10627 			ctxt->sax->setDocumentLocator(ctxt->userData,
10628 						      &xmlDefaultSAXLocator);
10629 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10630 		    ctxt->instate = XML_PARSER_EOF;
10631 #ifdef DEBUG_PUSH
10632 		    xmlGenericError(xmlGenericErrorContext,
10633 			    "PP: entering EOF\n");
10634 #endif
10635 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10636 			ctxt->sax->endDocument(ctxt->userData);
10637 		    goto done;
10638 		}
10639 	        if ((cur == '<') && (next == '?')) {
10640 		    /* PI or XML decl */
10641 		    if (avail < 5) return(ret);
10642 		    if ((!terminate) &&
10643 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10644 			return(ret);
10645 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10646 			ctxt->sax->setDocumentLocator(ctxt->userData,
10647 						      &xmlDefaultSAXLocator);
10648 		    if ((ctxt->input->cur[2] == 'x') &&
10649 			(ctxt->input->cur[3] == 'm') &&
10650 			(ctxt->input->cur[4] == 'l') &&
10651 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
10652 			ret += 5;
10653 #ifdef DEBUG_PUSH
10654 			xmlGenericError(xmlGenericErrorContext,
10655 				"PP: Parsing XML Decl\n");
10656 #endif
10657 			xmlParseXMLDecl(ctxt);
10658 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10659 			    /*
10660 			     * The XML REC instructs us to stop parsing right
10661 			     * here
10662 			     */
10663 			    ctxt->instate = XML_PARSER_EOF;
10664 			    return(0);
10665 			}
10666 			ctxt->standalone = ctxt->input->standalone;
10667 			if ((ctxt->encoding == NULL) &&
10668 			    (ctxt->input->encoding != NULL))
10669 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10670 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10671 			    (!ctxt->disableSAX))
10672 			    ctxt->sax->startDocument(ctxt->userData);
10673 			ctxt->instate = XML_PARSER_MISC;
10674 #ifdef DEBUG_PUSH
10675 			xmlGenericError(xmlGenericErrorContext,
10676 				"PP: entering MISC\n");
10677 #endif
10678 		    } else {
10679 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10680 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10681 			    (!ctxt->disableSAX))
10682 			    ctxt->sax->startDocument(ctxt->userData);
10683 			ctxt->instate = XML_PARSER_MISC;
10684 #ifdef DEBUG_PUSH
10685 			xmlGenericError(xmlGenericErrorContext,
10686 				"PP: entering MISC\n");
10687 #endif
10688 		    }
10689 		} else {
10690 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10691 			ctxt->sax->setDocumentLocator(ctxt->userData,
10692 						      &xmlDefaultSAXLocator);
10693 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10694 		    if (ctxt->version == NULL) {
10695 		        xmlErrMemory(ctxt, NULL);
10696 			break;
10697 		    }
10698 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10699 		        (!ctxt->disableSAX))
10700 			ctxt->sax->startDocument(ctxt->userData);
10701 		    ctxt->instate = XML_PARSER_MISC;
10702 #ifdef DEBUG_PUSH
10703 		    xmlGenericError(xmlGenericErrorContext,
10704 			    "PP: entering MISC\n");
10705 #endif
10706 		}
10707 		break;
10708             case XML_PARSER_START_TAG: {
10709 	        const xmlChar *name;
10710 		const xmlChar *prefix;
10711 		const xmlChar *URI;
10712 		int nsNr = ctxt->nsNr;
10713 
10714 		if ((avail < 2) && (ctxt->inputNr == 1))
10715 		    goto done;
10716 		cur = ctxt->input->cur[0];
10717 	        if (cur != '<') {
10718 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10719 		    ctxt->instate = XML_PARSER_EOF;
10720 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10721 			ctxt->sax->endDocument(ctxt->userData);
10722 		    goto done;
10723 		}
10724 		if (!terminate) {
10725 		    if (ctxt->progressive) {
10726 		        /* > can be found unescaped in attribute values */
10727 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10728 			    goto done;
10729 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10730 			goto done;
10731 		    }
10732 		}
10733 		if (ctxt->spaceNr == 0)
10734 		    spacePush(ctxt, -1);
10735 		else if (*ctxt->space == -2)
10736 		    spacePush(ctxt, -1);
10737 		else
10738 		    spacePush(ctxt, *ctxt->space);
10739 #ifdef LIBXML_SAX1_ENABLED
10740 		if (ctxt->sax2)
10741 #endif /* LIBXML_SAX1_ENABLED */
10742 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10743 #ifdef LIBXML_SAX1_ENABLED
10744 		else
10745 		    name = xmlParseStartTag(ctxt);
10746 #endif /* LIBXML_SAX1_ENABLED */
10747 		if (name == NULL) {
10748 		    spacePop(ctxt);
10749 		    ctxt->instate = XML_PARSER_EOF;
10750 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10751 			ctxt->sax->endDocument(ctxt->userData);
10752 		    goto done;
10753 		}
10754 #ifdef LIBXML_VALID_ENABLED
10755 		/*
10756 		 * [ VC: Root Element Type ]
10757 		 * The Name in the document type declaration must match
10758 		 * the element type of the root element.
10759 		 */
10760 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10761 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
10762 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10763 #endif /* LIBXML_VALID_ENABLED */
10764 
10765 		/*
10766 		 * Check for an Empty Element.
10767 		 */
10768 		if ((RAW == '/') && (NXT(1) == '>')) {
10769 		    SKIP(2);
10770 
10771 		    if (ctxt->sax2) {
10772 			if ((ctxt->sax != NULL) &&
10773 			    (ctxt->sax->endElementNs != NULL) &&
10774 			    (!ctxt->disableSAX))
10775 			    ctxt->sax->endElementNs(ctxt->userData, name,
10776 			                            prefix, URI);
10777 			if (ctxt->nsNr - nsNr > 0)
10778 			    nsPop(ctxt, ctxt->nsNr - nsNr);
10779 #ifdef LIBXML_SAX1_ENABLED
10780 		    } else {
10781 			if ((ctxt->sax != NULL) &&
10782 			    (ctxt->sax->endElement != NULL) &&
10783 			    (!ctxt->disableSAX))
10784 			    ctxt->sax->endElement(ctxt->userData, name);
10785 #endif /* LIBXML_SAX1_ENABLED */
10786 		    }
10787 		    spacePop(ctxt);
10788 		    if (ctxt->nameNr == 0) {
10789 			ctxt->instate = XML_PARSER_EPILOG;
10790 		    } else {
10791 			ctxt->instate = XML_PARSER_CONTENT;
10792 		    }
10793 		    break;
10794 		}
10795 		if (RAW == '>') {
10796 		    NEXT;
10797 		} else {
10798 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10799 					 "Couldn't find end of Start Tag %s\n",
10800 					 name);
10801 		    nodePop(ctxt);
10802 		    spacePop(ctxt);
10803 		}
10804 		if (ctxt->sax2)
10805 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
10806 #ifdef LIBXML_SAX1_ENABLED
10807 		else
10808 		    namePush(ctxt, name);
10809 #endif /* LIBXML_SAX1_ENABLED */
10810 
10811 		ctxt->instate = XML_PARSER_CONTENT;
10812                 break;
10813 	    }
10814             case XML_PARSER_CONTENT: {
10815 		const xmlChar *test;
10816 		unsigned int cons;
10817 		if ((avail < 2) && (ctxt->inputNr == 1))
10818 		    goto done;
10819 		cur = ctxt->input->cur[0];
10820 		next = ctxt->input->cur[1];
10821 
10822 		test = CUR_PTR;
10823 	        cons = ctxt->input->consumed;
10824 		if ((cur == '<') && (next == '/')) {
10825 		    ctxt->instate = XML_PARSER_END_TAG;
10826 		    break;
10827 	        } else if ((cur == '<') && (next == '?')) {
10828 		    if ((!terminate) &&
10829 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10830 			goto done;
10831 		    xmlParsePI(ctxt);
10832 		} else if ((cur == '<') && (next != '!')) {
10833 		    ctxt->instate = XML_PARSER_START_TAG;
10834 		    break;
10835 		} else if ((cur == '<') && (next == '!') &&
10836 		           (ctxt->input->cur[2] == '-') &&
10837 			   (ctxt->input->cur[3] == '-')) {
10838 		    int term;
10839 
10840 	            if (avail < 4)
10841 		        goto done;
10842 		    ctxt->input->cur += 4;
10843 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10844 		    ctxt->input->cur -= 4;
10845 		    if ((!terminate) && (term < 0))
10846 			goto done;
10847 		    xmlParseComment(ctxt);
10848 		    ctxt->instate = XML_PARSER_CONTENT;
10849 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10850 		    (ctxt->input->cur[2] == '[') &&
10851 		    (ctxt->input->cur[3] == 'C') &&
10852 		    (ctxt->input->cur[4] == 'D') &&
10853 		    (ctxt->input->cur[5] == 'A') &&
10854 		    (ctxt->input->cur[6] == 'T') &&
10855 		    (ctxt->input->cur[7] == 'A') &&
10856 		    (ctxt->input->cur[8] == '[')) {
10857 		    SKIP(9);
10858 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
10859 		    break;
10860 		} else if ((cur == '<') && (next == '!') &&
10861 		           (avail < 9)) {
10862 		    goto done;
10863 		} else if (cur == '&') {
10864 		    if ((!terminate) &&
10865 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10866 			goto done;
10867 		    xmlParseReference(ctxt);
10868 		} else {
10869 		    /* TODO Avoid the extra copy, handle directly !!! */
10870 		    /*
10871 		     * Goal of the following test is:
10872 		     *  - minimize calls to the SAX 'character' callback
10873 		     *    when they are mergeable
10874 		     *  - handle an problem for isBlank when we only parse
10875 		     *    a sequence of blank chars and the next one is
10876 		     *    not available to check against '<' presence.
10877 		     *  - tries to homogenize the differences in SAX
10878 		     *    callbacks between the push and pull versions
10879 		     *    of the parser.
10880 		     */
10881 		    if ((ctxt->inputNr == 1) &&
10882 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10883 			if (!terminate) {
10884 			    if (ctxt->progressive) {
10885 				if ((lastlt == NULL) ||
10886 				    (ctxt->input->cur > lastlt))
10887 				    goto done;
10888 			    } else if (xmlParseLookupSequence(ctxt,
10889 			                                      '<', 0, 0) < 0) {
10890 				goto done;
10891 			    }
10892 			}
10893                     }
10894 		    ctxt->checkIndex = 0;
10895 		    xmlParseCharData(ctxt, 0);
10896 		}
10897 		/*
10898 		 * Pop-up of finished entities.
10899 		 */
10900 		while ((RAW == 0) && (ctxt->inputNr > 1))
10901 		    xmlPopInput(ctxt);
10902 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10903 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10904 		                "detected an error in element content\n");
10905 		    ctxt->instate = XML_PARSER_EOF;
10906 		    break;
10907 		}
10908 		break;
10909 	    }
10910             case XML_PARSER_END_TAG:
10911 		if (avail < 2)
10912 		    goto done;
10913 		if (!terminate) {
10914 		    if (ctxt->progressive) {
10915 		        /* > can be found unescaped in attribute values */
10916 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10917 			    goto done;
10918 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10919 			goto done;
10920 		    }
10921 		}
10922 		if (ctxt->sax2) {
10923 		    xmlParseEndTag2(ctxt,
10924 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10925 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10926 		       (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10927 		    nameNsPop(ctxt);
10928 		}
10929 #ifdef LIBXML_SAX1_ENABLED
10930 		  else
10931 		    xmlParseEndTag1(ctxt, 0);
10932 #endif /* LIBXML_SAX1_ENABLED */
10933 		if (ctxt->nameNr == 0) {
10934 		    ctxt->instate = XML_PARSER_EPILOG;
10935 		} else {
10936 		    ctxt->instate = XML_PARSER_CONTENT;
10937 		}
10938 		break;
10939             case XML_PARSER_CDATA_SECTION: {
10940 	        /*
10941 		 * The Push mode need to have the SAX callback for
10942 		 * cdataBlock merge back contiguous callbacks.
10943 		 */
10944 		int base;
10945 
10946 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10947 		if (base < 0) {
10948 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10949 		        int tmp;
10950 
10951 			tmp = xmlCheckCdataPush(ctxt->input->cur,
10952 			                        XML_PARSER_BIG_BUFFER_SIZE);
10953 			if (tmp < 0) {
10954 			    tmp = -tmp;
10955 			    ctxt->input->cur += tmp;
10956 			    goto encoding_error;
10957 			}
10958 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10959 			    if (ctxt->sax->cdataBlock != NULL)
10960 				ctxt->sax->cdataBlock(ctxt->userData,
10961 				                      ctxt->input->cur, tmp);
10962 			    else if (ctxt->sax->characters != NULL)
10963 				ctxt->sax->characters(ctxt->userData,
10964 				                      ctxt->input->cur, tmp);
10965 			}
10966 			SKIPL(tmp);
10967 			ctxt->checkIndex = 0;
10968 		    }
10969 		    goto done;
10970 		} else {
10971 		    int tmp;
10972 
10973 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10974 		    if ((tmp < 0) || (tmp != base)) {
10975 			tmp = -tmp;
10976 			ctxt->input->cur += tmp;
10977 			goto encoding_error;
10978 		    }
10979 		    if ((ctxt->sax != NULL) && (base == 0) &&
10980 		        (ctxt->sax->cdataBlock != NULL) &&
10981 		        (!ctxt->disableSAX)) {
10982 			/*
10983 			 * Special case to provide identical behaviour
10984 			 * between pull and push parsers on enpty CDATA
10985 			 * sections
10986 			 */
10987 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10988 			     (!strncmp((const char *)&ctxt->input->cur[-9],
10989 			               "<![CDATA[", 9)))
10990 			     ctxt->sax->cdataBlock(ctxt->userData,
10991 			                           BAD_CAST "", 0);
10992 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
10993 			(!ctxt->disableSAX)) {
10994 			if (ctxt->sax->cdataBlock != NULL)
10995 			    ctxt->sax->cdataBlock(ctxt->userData,
10996 						  ctxt->input->cur, base);
10997 			else if (ctxt->sax->characters != NULL)
10998 			    ctxt->sax->characters(ctxt->userData,
10999 						  ctxt->input->cur, base);
11000 		    }
11001 		    SKIPL(base + 3);
11002 		    ctxt->checkIndex = 0;
11003 		    ctxt->instate = XML_PARSER_CONTENT;
11004 #ifdef DEBUG_PUSH
11005 		    xmlGenericError(xmlGenericErrorContext,
11006 			    "PP: entering CONTENT\n");
11007 #endif
11008 		}
11009 		break;
11010 	    }
11011             case XML_PARSER_MISC:
11012 		SKIP_BLANKS;
11013 		if (ctxt->input->buf == NULL)
11014 		    avail = ctxt->input->length -
11015 		            (ctxt->input->cur - ctxt->input->base);
11016 		else
11017 		    avail = ctxt->input->buf->buffer->use -
11018 		            (ctxt->input->cur - ctxt->input->base);
11019 		if (avail < 2)
11020 		    goto done;
11021 		cur = ctxt->input->cur[0];
11022 		next = ctxt->input->cur[1];
11023 	        if ((cur == '<') && (next == '?')) {
11024 		    if ((!terminate) &&
11025 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11026 			goto done;
11027 #ifdef DEBUG_PUSH
11028 		    xmlGenericError(xmlGenericErrorContext,
11029 			    "PP: Parsing PI\n");
11030 #endif
11031 		    xmlParsePI(ctxt);
11032 		    ctxt->checkIndex = 0;
11033 		} else if ((cur == '<') && (next == '!') &&
11034 		    (ctxt->input->cur[2] == '-') &&
11035 		    (ctxt->input->cur[3] == '-')) {
11036 		    if ((!terminate) &&
11037 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11038 			goto done;
11039 #ifdef DEBUG_PUSH
11040 		    xmlGenericError(xmlGenericErrorContext,
11041 			    "PP: Parsing Comment\n");
11042 #endif
11043 		    xmlParseComment(ctxt);
11044 		    ctxt->instate = XML_PARSER_MISC;
11045 		    ctxt->checkIndex = 0;
11046 		} else if ((cur == '<') && (next == '!') &&
11047 		    (ctxt->input->cur[2] == 'D') &&
11048 		    (ctxt->input->cur[3] == 'O') &&
11049 		    (ctxt->input->cur[4] == 'C') &&
11050 		    (ctxt->input->cur[5] == 'T') &&
11051 		    (ctxt->input->cur[6] == 'Y') &&
11052 		    (ctxt->input->cur[7] == 'P') &&
11053 		    (ctxt->input->cur[8] == 'E')) {
11054 		    if ((!terminate) &&
11055 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11056 			goto done;
11057 #ifdef DEBUG_PUSH
11058 		    xmlGenericError(xmlGenericErrorContext,
11059 			    "PP: Parsing internal subset\n");
11060 #endif
11061 		    ctxt->inSubset = 1;
11062 		    xmlParseDocTypeDecl(ctxt);
11063 		    if (RAW == '[') {
11064 			ctxt->instate = XML_PARSER_DTD;
11065 #ifdef DEBUG_PUSH
11066 			xmlGenericError(xmlGenericErrorContext,
11067 				"PP: entering DTD\n");
11068 #endif
11069 		    } else {
11070 			/*
11071 			 * Create and update the external subset.
11072 			 */
11073 			ctxt->inSubset = 2;
11074 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11075 			    (ctxt->sax->externalSubset != NULL))
11076 			    ctxt->sax->externalSubset(ctxt->userData,
11077 				    ctxt->intSubName, ctxt->extSubSystem,
11078 				    ctxt->extSubURI);
11079 			ctxt->inSubset = 0;
11080 			xmlCleanSpecialAttr(ctxt);
11081 			ctxt->instate = XML_PARSER_PROLOG;
11082 #ifdef DEBUG_PUSH
11083 			xmlGenericError(xmlGenericErrorContext,
11084 				"PP: entering PROLOG\n");
11085 #endif
11086 		    }
11087 		} else if ((cur == '<') && (next == '!') &&
11088 		           (avail < 9)) {
11089 		    goto done;
11090 		} else {
11091 		    ctxt->instate = XML_PARSER_START_TAG;
11092 		    ctxt->progressive = 1;
11093 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11094 #ifdef DEBUG_PUSH
11095 		    xmlGenericError(xmlGenericErrorContext,
11096 			    "PP: entering START_TAG\n");
11097 #endif
11098 		}
11099 		break;
11100             case XML_PARSER_PROLOG:
11101 		SKIP_BLANKS;
11102 		if (ctxt->input->buf == NULL)
11103 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11104 		else
11105 		    avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11106 		if (avail < 2)
11107 		    goto done;
11108 		cur = ctxt->input->cur[0];
11109 		next = ctxt->input->cur[1];
11110 	        if ((cur == '<') && (next == '?')) {
11111 		    if ((!terminate) &&
11112 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11113 			goto done;
11114 #ifdef DEBUG_PUSH
11115 		    xmlGenericError(xmlGenericErrorContext,
11116 			    "PP: Parsing PI\n");
11117 #endif
11118 		    xmlParsePI(ctxt);
11119 		} else if ((cur == '<') && (next == '!') &&
11120 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11121 		    if ((!terminate) &&
11122 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11123 			goto done;
11124 #ifdef DEBUG_PUSH
11125 		    xmlGenericError(xmlGenericErrorContext,
11126 			    "PP: Parsing Comment\n");
11127 #endif
11128 		    xmlParseComment(ctxt);
11129 		    ctxt->instate = XML_PARSER_PROLOG;
11130 		} else if ((cur == '<') && (next == '!') &&
11131 		           (avail < 4)) {
11132 		    goto done;
11133 		} else {
11134 		    ctxt->instate = XML_PARSER_START_TAG;
11135 		    if (ctxt->progressive == 0)
11136 			ctxt->progressive = 1;
11137 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11138 #ifdef DEBUG_PUSH
11139 		    xmlGenericError(xmlGenericErrorContext,
11140 			    "PP: entering START_TAG\n");
11141 #endif
11142 		}
11143 		break;
11144             case XML_PARSER_EPILOG:
11145 		SKIP_BLANKS;
11146 		if (ctxt->input->buf == NULL)
11147 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11148 		else
11149 		    avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11150 		if (avail < 2)
11151 		    goto done;
11152 		cur = ctxt->input->cur[0];
11153 		next = ctxt->input->cur[1];
11154 	        if ((cur == '<') && (next == '?')) {
11155 		    if ((!terminate) &&
11156 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11157 			goto done;
11158 #ifdef DEBUG_PUSH
11159 		    xmlGenericError(xmlGenericErrorContext,
11160 			    "PP: Parsing PI\n");
11161 #endif
11162 		    xmlParsePI(ctxt);
11163 		    ctxt->instate = XML_PARSER_EPILOG;
11164 		} else if ((cur == '<') && (next == '!') &&
11165 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11166 		    if ((!terminate) &&
11167 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11168 			goto done;
11169 #ifdef DEBUG_PUSH
11170 		    xmlGenericError(xmlGenericErrorContext,
11171 			    "PP: Parsing Comment\n");
11172 #endif
11173 		    xmlParseComment(ctxt);
11174 		    ctxt->instate = XML_PARSER_EPILOG;
11175 		} else if ((cur == '<') && (next == '!') &&
11176 		           (avail < 4)) {
11177 		    goto done;
11178 		} else {
11179 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11180 		    ctxt->instate = XML_PARSER_EOF;
11181 #ifdef DEBUG_PUSH
11182 		    xmlGenericError(xmlGenericErrorContext,
11183 			    "PP: entering EOF\n");
11184 #endif
11185 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11186 			ctxt->sax->endDocument(ctxt->userData);
11187 		    goto done;
11188 		}
11189 		break;
11190             case XML_PARSER_DTD: {
11191 	        /*
11192 		 * Sorry but progressive parsing of the internal subset
11193 		 * is not expected to be supported. We first check that
11194 		 * the full content of the internal subset is available and
11195 		 * the parsing is launched only at that point.
11196 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11197 		 * section and not in a ']]>' sequence which are conditional
11198 		 * sections (whoever argued to keep that crap in XML deserve
11199 		 * a place in hell !).
11200 		 */
11201 		int base, i;
11202 		xmlChar *buf;
11203 	        xmlChar quote = 0;
11204 
11205 		base = ctxt->input->cur - ctxt->input->base;
11206 		if (base < 0) return(0);
11207 		if (ctxt->checkIndex > base)
11208 		    base = ctxt->checkIndex;
11209 		buf = ctxt->input->buf->buffer->content;
11210 		for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11211 		     base++) {
11212 		    if (quote != 0) {
11213 		        if (buf[base] == quote)
11214 			    quote = 0;
11215 			continue;
11216 		    }
11217 		    if ((quote == 0) && (buf[base] == '<')) {
11218 		        int found  = 0;
11219 			/* special handling of comments */
11220 		        if (((unsigned int) base + 4 <
11221 			     ctxt->input->buf->buffer->use) &&
11222 			    (buf[base + 1] == '!') &&
11223 			    (buf[base + 2] == '-') &&
11224 			    (buf[base + 3] == '-')) {
11225 			    for (;(unsigned int) base + 3 <
11226 			          ctxt->input->buf->buffer->use; base++) {
11227 				if ((buf[base] == '-') &&
11228 				    (buf[base + 1] == '-') &&
11229 				    (buf[base + 2] == '>')) {
11230 				    found = 1;
11231 				    base += 2;
11232 				    break;
11233 				}
11234 		            }
11235 			    if (!found) {
11236 #if 0
11237 			        fprintf(stderr, "unfinished comment\n");
11238 #endif
11239 			        break; /* for */
11240 		            }
11241 		            continue;
11242 			}
11243 		    }
11244 		    if (buf[base] == '"') {
11245 		        quote = '"';
11246 			continue;
11247 		    }
11248 		    if (buf[base] == '\'') {
11249 		        quote = '\'';
11250 			continue;
11251 		    }
11252 		    if (buf[base] == ']') {
11253 #if 0
11254 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
11255 			        buf[base + 1], buf[base + 2], buf[base + 3]);
11256 #endif
11257 		        if ((unsigned int) base +1 >=
11258 		            ctxt->input->buf->buffer->use)
11259 			    break;
11260 			if (buf[base + 1] == ']') {
11261 			    /* conditional crap, skip both ']' ! */
11262 			    base++;
11263 			    continue;
11264 			}
11265 		        for (i = 1;
11266 		     (unsigned int) base + i < ctxt->input->buf->buffer->use;
11267 		             i++) {
11268 			    if (buf[base + i] == '>') {
11269 #if 0
11270 			        fprintf(stderr, "found\n");
11271 #endif
11272 			        goto found_end_int_subset;
11273 			    }
11274 			    if (!IS_BLANK_CH(buf[base + i])) {
11275 #if 0
11276 			        fprintf(stderr, "not found\n");
11277 #endif
11278 			        goto not_end_of_int_subset;
11279 			    }
11280 			}
11281 #if 0
11282 			fprintf(stderr, "end of stream\n");
11283 #endif
11284 		        break;
11285 
11286 		    }
11287 not_end_of_int_subset:
11288                     continue; /* for */
11289 		}
11290 		/*
11291 		 * We didn't found the end of the Internal subset
11292 		 */
11293 #ifdef DEBUG_PUSH
11294 		if (next == 0)
11295 		    xmlGenericError(xmlGenericErrorContext,
11296 			    "PP: lookup of int subset end filed\n");
11297 #endif
11298 	        goto done;
11299 
11300 found_end_int_subset:
11301 		xmlParseInternalSubset(ctxt);
11302 		ctxt->inSubset = 2;
11303 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11304 		    (ctxt->sax->externalSubset != NULL))
11305 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11306 			    ctxt->extSubSystem, ctxt->extSubURI);
11307 		ctxt->inSubset = 0;
11308 		xmlCleanSpecialAttr(ctxt);
11309 		ctxt->instate = XML_PARSER_PROLOG;
11310 		ctxt->checkIndex = 0;
11311 #ifdef DEBUG_PUSH
11312 		xmlGenericError(xmlGenericErrorContext,
11313 			"PP: entering PROLOG\n");
11314 #endif
11315                 break;
11316 	    }
11317             case XML_PARSER_COMMENT:
11318 		xmlGenericError(xmlGenericErrorContext,
11319 			"PP: internal error, state == COMMENT\n");
11320 		ctxt->instate = XML_PARSER_CONTENT;
11321 #ifdef DEBUG_PUSH
11322 		xmlGenericError(xmlGenericErrorContext,
11323 			"PP: entering CONTENT\n");
11324 #endif
11325 		break;
11326             case XML_PARSER_IGNORE:
11327 		xmlGenericError(xmlGenericErrorContext,
11328 			"PP: internal error, state == IGNORE");
11329 	        ctxt->instate = XML_PARSER_DTD;
11330 #ifdef DEBUG_PUSH
11331 		xmlGenericError(xmlGenericErrorContext,
11332 			"PP: entering DTD\n");
11333 #endif
11334 	        break;
11335             case XML_PARSER_PI:
11336 		xmlGenericError(xmlGenericErrorContext,
11337 			"PP: internal error, state == PI\n");
11338 		ctxt->instate = XML_PARSER_CONTENT;
11339 #ifdef DEBUG_PUSH
11340 		xmlGenericError(xmlGenericErrorContext,
11341 			"PP: entering CONTENT\n");
11342 #endif
11343 		break;
11344             case XML_PARSER_ENTITY_DECL:
11345 		xmlGenericError(xmlGenericErrorContext,
11346 			"PP: internal error, state == ENTITY_DECL\n");
11347 		ctxt->instate = XML_PARSER_DTD;
11348 #ifdef DEBUG_PUSH
11349 		xmlGenericError(xmlGenericErrorContext,
11350 			"PP: entering DTD\n");
11351 #endif
11352 		break;
11353             case XML_PARSER_ENTITY_VALUE:
11354 		xmlGenericError(xmlGenericErrorContext,
11355 			"PP: internal error, state == ENTITY_VALUE\n");
11356 		ctxt->instate = XML_PARSER_CONTENT;
11357 #ifdef DEBUG_PUSH
11358 		xmlGenericError(xmlGenericErrorContext,
11359 			"PP: entering DTD\n");
11360 #endif
11361 		break;
11362             case XML_PARSER_ATTRIBUTE_VALUE:
11363 		xmlGenericError(xmlGenericErrorContext,
11364 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
11365 		ctxt->instate = XML_PARSER_START_TAG;
11366 #ifdef DEBUG_PUSH
11367 		xmlGenericError(xmlGenericErrorContext,
11368 			"PP: entering START_TAG\n");
11369 #endif
11370 		break;
11371             case XML_PARSER_SYSTEM_LITERAL:
11372 		xmlGenericError(xmlGenericErrorContext,
11373 			"PP: internal error, state == SYSTEM_LITERAL\n");
11374 		ctxt->instate = XML_PARSER_START_TAG;
11375 #ifdef DEBUG_PUSH
11376 		xmlGenericError(xmlGenericErrorContext,
11377 			"PP: entering START_TAG\n");
11378 #endif
11379 		break;
11380             case XML_PARSER_PUBLIC_LITERAL:
11381 		xmlGenericError(xmlGenericErrorContext,
11382 			"PP: internal error, state == PUBLIC_LITERAL\n");
11383 		ctxt->instate = XML_PARSER_START_TAG;
11384 #ifdef DEBUG_PUSH
11385 		xmlGenericError(xmlGenericErrorContext,
11386 			"PP: entering START_TAG\n");
11387 #endif
11388 		break;
11389 	}
11390     }
11391 done:
11392 #ifdef DEBUG_PUSH
11393     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11394 #endif
11395     return(ret);
11396 encoding_error:
11397     {
11398         char buffer[150];
11399 
11400 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11401 			ctxt->input->cur[0], ctxt->input->cur[1],
11402 			ctxt->input->cur[2], ctxt->input->cur[3]);
11403 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11404 		     "Input is not proper UTF-8, indicate encoding !\n%s",
11405 		     BAD_CAST buffer, NULL);
11406     }
11407     return(0);
11408 }
11409 
11410 /**
11411  * xmlParseChunk:
11412  * @ctxt:  an XML parser context
11413  * @chunk:  an char array
11414  * @size:  the size in byte of the chunk
11415  * @terminate:  last chunk indicator
11416  *
11417  * Parse a Chunk of memory
11418  *
11419  * Returns zero if no error, the xmlParserErrors otherwise.
11420  */
11421 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11422 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11423               int terminate) {
11424     int end_in_lf = 0;
11425 
11426     if (ctxt == NULL)
11427         return(XML_ERR_INTERNAL_ERROR);
11428     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11429         return(ctxt->errNo);
11430     if (ctxt->instate == XML_PARSER_START)
11431         xmlDetectSAX2(ctxt);
11432     if ((size > 0) && (chunk != NULL) && (!terminate) &&
11433         (chunk[size - 1] == '\r')) {
11434 	end_in_lf = 1;
11435 	size--;
11436     }
11437     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11438         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
11439 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11440 	int cur = ctxt->input->cur - ctxt->input->base;
11441 	int res;
11442 
11443 	res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11444 	if (res < 0) {
11445 	    ctxt->errNo = XML_PARSER_EOF;
11446 	    ctxt->disableSAX = 1;
11447 	    return (XML_PARSER_EOF);
11448 	}
11449 	ctxt->input->base = ctxt->input->buf->buffer->content + base;
11450 	ctxt->input->cur = ctxt->input->base + cur;
11451 	ctxt->input->end =
11452 	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11453 #ifdef DEBUG_PUSH
11454 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11455 #endif
11456 
11457     } else if (ctxt->instate != XML_PARSER_EOF) {
11458 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11459 	    xmlParserInputBufferPtr in = ctxt->input->buf;
11460 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
11461 		    (in->raw != NULL)) {
11462 		int nbchars;
11463 
11464 		nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11465 		if (nbchars < 0) {
11466 		    /* TODO 2.6.0 */
11467 		    xmlGenericError(xmlGenericErrorContext,
11468 				    "xmlParseChunk: encoder error\n");
11469 		    return(XML_ERR_INVALID_ENCODING);
11470 		}
11471 	    }
11472 	}
11473     }
11474     xmlParseTryOrFinish(ctxt, terminate);
11475     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11476         (ctxt->input->buf != NULL)) {
11477 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11478     }
11479     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11480         return(ctxt->errNo);
11481     if (terminate) {
11482 	/*
11483 	 * Check for termination
11484 	 */
11485 	int avail = 0;
11486 
11487 	if (ctxt->input != NULL) {
11488 	    if (ctxt->input->buf == NULL)
11489 		avail = ctxt->input->length -
11490 			(ctxt->input->cur - ctxt->input->base);
11491 	    else
11492 		avail = ctxt->input->buf->buffer->use -
11493 			(ctxt->input->cur - ctxt->input->base);
11494 	}
11495 
11496 	if ((ctxt->instate != XML_PARSER_EOF) &&
11497 	    (ctxt->instate != XML_PARSER_EPILOG)) {
11498 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11499 	}
11500 	if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11501 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11502 	}
11503 	if (ctxt->instate != XML_PARSER_EOF) {
11504 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11505 		ctxt->sax->endDocument(ctxt->userData);
11506 	}
11507 	ctxt->instate = XML_PARSER_EOF;
11508     }
11509     return((xmlParserErrors) ctxt->errNo);
11510 }
11511 
11512 /************************************************************************
11513  *									*
11514  * 		I/O front end functions to the parser			*
11515  *									*
11516  ************************************************************************/
11517 
11518 /**
11519  * xmlCreatePushParserCtxt:
11520  * @sax:  a SAX handler
11521  * @user_data:  The user data returned on SAX callbacks
11522  * @chunk:  a pointer to an array of chars
11523  * @size:  number of chars in the array
11524  * @filename:  an optional file name or URI
11525  *
11526  * Create a parser context for using the XML parser in push mode.
11527  * If @buffer and @size are non-NULL, the data is used to detect
11528  * the encoding.  The remaining characters will be parsed so they
11529  * don't need to be fed in again through xmlParseChunk.
11530  * To allow content encoding detection, @size should be >= 4
11531  * The value of @filename is used for fetching external entities
11532  * and error/warning reports.
11533  *
11534  * Returns the new parser context or NULL
11535  */
11536 
11537 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11538 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11539                         const char *chunk, int size, const char *filename) {
11540     xmlParserCtxtPtr ctxt;
11541     xmlParserInputPtr inputStream;
11542     xmlParserInputBufferPtr buf;
11543     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11544 
11545     /*
11546      * plug some encoding conversion routines
11547      */
11548     if ((chunk != NULL) && (size >= 4))
11549 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11550 
11551     buf = xmlAllocParserInputBuffer(enc);
11552     if (buf == NULL) return(NULL);
11553 
11554     ctxt = xmlNewParserCtxt();
11555     if (ctxt == NULL) {
11556         xmlErrMemory(NULL, "creating parser: out of memory\n");
11557 	xmlFreeParserInputBuffer(buf);
11558 	return(NULL);
11559     }
11560     ctxt->dictNames = 1;
11561     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11562     if (ctxt->pushTab == NULL) {
11563         xmlErrMemory(ctxt, NULL);
11564 	xmlFreeParserInputBuffer(buf);
11565 	xmlFreeParserCtxt(ctxt);
11566 	return(NULL);
11567     }
11568     if (sax != NULL) {
11569 #ifdef LIBXML_SAX1_ENABLED
11570 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11571 #endif /* LIBXML_SAX1_ENABLED */
11572 	    xmlFree(ctxt->sax);
11573 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11574 	if (ctxt->sax == NULL) {
11575 	    xmlErrMemory(ctxt, NULL);
11576 	    xmlFreeParserInputBuffer(buf);
11577 	    xmlFreeParserCtxt(ctxt);
11578 	    return(NULL);
11579 	}
11580 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11581 	if (sax->initialized == XML_SAX2_MAGIC)
11582 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11583 	else
11584 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11585 	if (user_data != NULL)
11586 	    ctxt->userData = user_data;
11587     }
11588     if (filename == NULL) {
11589 	ctxt->directory = NULL;
11590     } else {
11591         ctxt->directory = xmlParserGetDirectory(filename);
11592     }
11593 
11594     inputStream = xmlNewInputStream(ctxt);
11595     if (inputStream == NULL) {
11596 	xmlFreeParserCtxt(ctxt);
11597 	xmlFreeParserInputBuffer(buf);
11598 	return(NULL);
11599     }
11600 
11601     if (filename == NULL)
11602 	inputStream->filename = NULL;
11603     else {
11604 	inputStream->filename = (char *)
11605 	    xmlCanonicPath((const xmlChar *) filename);
11606 	if (inputStream->filename == NULL) {
11607 	    xmlFreeParserCtxt(ctxt);
11608 	    xmlFreeParserInputBuffer(buf);
11609 	    return(NULL);
11610 	}
11611     }
11612     inputStream->buf = buf;
11613     inputStream->base = inputStream->buf->buffer->content;
11614     inputStream->cur = inputStream->buf->buffer->content;
11615     inputStream->end =
11616 	&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11617 
11618     inputPush(ctxt, inputStream);
11619 
11620     /*
11621      * If the caller didn't provide an initial 'chunk' for determining
11622      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11623      * that it can be automatically determined later
11624      */
11625     if ((size == 0) || (chunk == NULL)) {
11626 	ctxt->charset = XML_CHAR_ENCODING_NONE;
11627     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11628 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11629 	int cur = ctxt->input->cur - ctxt->input->base;
11630 
11631 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11632 
11633 	ctxt->input->base = ctxt->input->buf->buffer->content + base;
11634 	ctxt->input->cur = ctxt->input->base + cur;
11635 	ctxt->input->end =
11636 	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11637 #ifdef DEBUG_PUSH
11638 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11639 #endif
11640     }
11641 
11642     if (enc != XML_CHAR_ENCODING_NONE) {
11643         xmlSwitchEncoding(ctxt, enc);
11644     }
11645 
11646     return(ctxt);
11647 }
11648 #endif /* LIBXML_PUSH_ENABLED */
11649 
11650 /**
11651  * xmlStopParser:
11652  * @ctxt:  an XML parser context
11653  *
11654  * Blocks further parser processing
11655  */
11656 void
xmlStopParser(xmlParserCtxtPtr ctxt)11657 xmlStopParser(xmlParserCtxtPtr ctxt) {
11658     if (ctxt == NULL)
11659         return;
11660     ctxt->instate = XML_PARSER_EOF;
11661     ctxt->disableSAX = 1;
11662     if (ctxt->input != NULL) {
11663 	ctxt->input->cur = BAD_CAST"";
11664 	ctxt->input->base = ctxt->input->cur;
11665     }
11666 }
11667 
11668 /**
11669  * xmlCreateIOParserCtxt:
11670  * @sax:  a SAX handler
11671  * @user_data:  The user data returned on SAX callbacks
11672  * @ioread:  an I/O read function
11673  * @ioclose:  an I/O close function
11674  * @ioctx:  an I/O handler
11675  * @enc:  the charset encoding if known
11676  *
11677  * Create a parser context for using the XML parser with an existing
11678  * I/O stream
11679  *
11680  * Returns the new parser context or NULL
11681  */
11682 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11683 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11684 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
11685 	void *ioctx, xmlCharEncoding enc) {
11686     xmlParserCtxtPtr ctxt;
11687     xmlParserInputPtr inputStream;
11688     xmlParserInputBufferPtr buf;
11689 
11690     if (ioread == NULL) return(NULL);
11691 
11692     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11693     if (buf == NULL) return(NULL);
11694 
11695     ctxt = xmlNewParserCtxt();
11696     if (ctxt == NULL) {
11697 	xmlFreeParserInputBuffer(buf);
11698 	return(NULL);
11699     }
11700     if (sax != NULL) {
11701 #ifdef LIBXML_SAX1_ENABLED
11702 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11703 #endif /* LIBXML_SAX1_ENABLED */
11704 	    xmlFree(ctxt->sax);
11705 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11706 	if (ctxt->sax == NULL) {
11707 	    xmlErrMemory(ctxt, NULL);
11708 	    xmlFreeParserCtxt(ctxt);
11709 	    return(NULL);
11710 	}
11711 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11712 	if (sax->initialized == XML_SAX2_MAGIC)
11713 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11714 	else
11715 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11716 	if (user_data != NULL)
11717 	    ctxt->userData = user_data;
11718     }
11719 
11720     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11721     if (inputStream == NULL) {
11722 	xmlFreeParserCtxt(ctxt);
11723 	return(NULL);
11724     }
11725     inputPush(ctxt, inputStream);
11726 
11727     return(ctxt);
11728 }
11729 
11730 #ifdef LIBXML_VALID_ENABLED
11731 /************************************************************************
11732  *									*
11733  * 		Front ends when parsing a DTD				*
11734  *									*
11735  ************************************************************************/
11736 
11737 /**
11738  * xmlIOParseDTD:
11739  * @sax:  the SAX handler block or NULL
11740  * @input:  an Input Buffer
11741  * @enc:  the charset encoding if known
11742  *
11743  * Load and parse a DTD
11744  *
11745  * Returns the resulting xmlDtdPtr or NULL in case of error.
11746  * @input will be freed by the function in any case.
11747  */
11748 
11749 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)11750 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11751 	      xmlCharEncoding enc) {
11752     xmlDtdPtr ret = NULL;
11753     xmlParserCtxtPtr ctxt;
11754     xmlParserInputPtr pinput = NULL;
11755     xmlChar start[4];
11756 
11757     if (input == NULL)
11758 	return(NULL);
11759 
11760     ctxt = xmlNewParserCtxt();
11761     if (ctxt == NULL) {
11762         xmlFreeParserInputBuffer(input);
11763 	return(NULL);
11764     }
11765 
11766     /*
11767      * Set-up the SAX context
11768      */
11769     if (sax != NULL) {
11770 	if (ctxt->sax != NULL)
11771 	    xmlFree(ctxt->sax);
11772         ctxt->sax = sax;
11773         ctxt->userData = ctxt;
11774     }
11775     xmlDetectSAX2(ctxt);
11776 
11777     /*
11778      * generate a parser input from the I/O handler
11779      */
11780 
11781     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11782     if (pinput == NULL) {
11783         if (sax != NULL) ctxt->sax = NULL;
11784         xmlFreeParserInputBuffer(input);
11785 	xmlFreeParserCtxt(ctxt);
11786 	return(NULL);
11787     }
11788 
11789     /*
11790      * plug some encoding conversion routines here.
11791      */
11792     if (xmlPushInput(ctxt, pinput) < 0) {
11793         if (sax != NULL) ctxt->sax = NULL;
11794 	xmlFreeParserCtxt(ctxt);
11795 	return(NULL);
11796     }
11797     if (enc != XML_CHAR_ENCODING_NONE) {
11798         xmlSwitchEncoding(ctxt, enc);
11799     }
11800 
11801     pinput->filename = NULL;
11802     pinput->line = 1;
11803     pinput->col = 1;
11804     pinput->base = ctxt->input->cur;
11805     pinput->cur = ctxt->input->cur;
11806     pinput->free = NULL;
11807 
11808     /*
11809      * let's parse that entity knowing it's an external subset.
11810      */
11811     ctxt->inSubset = 2;
11812     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11813     if (ctxt->myDoc == NULL) {
11814 	xmlErrMemory(ctxt, "New Doc failed");
11815 	return(NULL);
11816     }
11817     ctxt->myDoc->properties = XML_DOC_INTERNAL;
11818     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11819 	                               BAD_CAST "none", BAD_CAST "none");
11820 
11821     if ((enc == XML_CHAR_ENCODING_NONE) &&
11822         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
11823 	/*
11824 	 * Get the 4 first bytes and decode the charset
11825 	 * if enc != XML_CHAR_ENCODING_NONE
11826 	 * plug some encoding conversion routines.
11827 	 */
11828 	start[0] = RAW;
11829 	start[1] = NXT(1);
11830 	start[2] = NXT(2);
11831 	start[3] = NXT(3);
11832 	enc = xmlDetectCharEncoding(start, 4);
11833 	if (enc != XML_CHAR_ENCODING_NONE) {
11834 	    xmlSwitchEncoding(ctxt, enc);
11835 	}
11836     }
11837 
11838     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11839 
11840     if (ctxt->myDoc != NULL) {
11841 	if (ctxt->wellFormed) {
11842 	    ret = ctxt->myDoc->extSubset;
11843 	    ctxt->myDoc->extSubset = NULL;
11844 	    if (ret != NULL) {
11845 		xmlNodePtr tmp;
11846 
11847 		ret->doc = NULL;
11848 		tmp = ret->children;
11849 		while (tmp != NULL) {
11850 		    tmp->doc = NULL;
11851 		    tmp = tmp->next;
11852 		}
11853 	    }
11854 	} else {
11855 	    ret = NULL;
11856 	}
11857         xmlFreeDoc(ctxt->myDoc);
11858         ctxt->myDoc = NULL;
11859     }
11860     if (sax != NULL) ctxt->sax = NULL;
11861     xmlFreeParserCtxt(ctxt);
11862 
11863     return(ret);
11864 }
11865 
11866 /**
11867  * xmlSAXParseDTD:
11868  * @sax:  the SAX handler block
11869  * @ExternalID:  a NAME* containing the External ID of the DTD
11870  * @SystemID:  a NAME* containing the URL to the DTD
11871  *
11872  * Load and parse an external subset.
11873  *
11874  * Returns the resulting xmlDtdPtr or NULL in case of error.
11875  */
11876 
11877 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)11878 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11879                           const xmlChar *SystemID) {
11880     xmlDtdPtr ret = NULL;
11881     xmlParserCtxtPtr ctxt;
11882     xmlParserInputPtr input = NULL;
11883     xmlCharEncoding enc;
11884     xmlChar* systemIdCanonic;
11885 
11886     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11887 
11888     ctxt = xmlNewParserCtxt();
11889     if (ctxt == NULL) {
11890 	return(NULL);
11891     }
11892 
11893     /*
11894      * Set-up the SAX context
11895      */
11896     if (sax != NULL) {
11897 	if (ctxt->sax != NULL)
11898 	    xmlFree(ctxt->sax);
11899         ctxt->sax = sax;
11900         ctxt->userData = ctxt;
11901     }
11902 
11903     /*
11904      * Canonicalise the system ID
11905      */
11906     systemIdCanonic = xmlCanonicPath(SystemID);
11907     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11908 	xmlFreeParserCtxt(ctxt);
11909 	return(NULL);
11910     }
11911 
11912     /*
11913      * Ask the Entity resolver to load the damn thing
11914      */
11915 
11916     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11917 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11918 	                                 systemIdCanonic);
11919     if (input == NULL) {
11920         if (sax != NULL) ctxt->sax = NULL;
11921 	xmlFreeParserCtxt(ctxt);
11922 	if (systemIdCanonic != NULL)
11923 	    xmlFree(systemIdCanonic);
11924 	return(NULL);
11925     }
11926 
11927     /*
11928      * plug some encoding conversion routines here.
11929      */
11930     if (xmlPushInput(ctxt, input) < 0) {
11931         if (sax != NULL) ctxt->sax = NULL;
11932 	xmlFreeParserCtxt(ctxt);
11933 	if (systemIdCanonic != NULL)
11934 	    xmlFree(systemIdCanonic);
11935 	return(NULL);
11936     }
11937     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11938 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11939 	xmlSwitchEncoding(ctxt, enc);
11940     }
11941 
11942     if (input->filename == NULL)
11943 	input->filename = (char *) systemIdCanonic;
11944     else
11945 	xmlFree(systemIdCanonic);
11946     input->line = 1;
11947     input->col = 1;
11948     input->base = ctxt->input->cur;
11949     input->cur = ctxt->input->cur;
11950     input->free = NULL;
11951 
11952     /*
11953      * let's parse that entity knowing it's an external subset.
11954      */
11955     ctxt->inSubset = 2;
11956     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11957     if (ctxt->myDoc == NULL) {
11958 	xmlErrMemory(ctxt, "New Doc failed");
11959         if (sax != NULL) ctxt->sax = NULL;
11960 	xmlFreeParserCtxt(ctxt);
11961 	return(NULL);
11962     }
11963     ctxt->myDoc->properties = XML_DOC_INTERNAL;
11964     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11965 	                               ExternalID, SystemID);
11966     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11967 
11968     if (ctxt->myDoc != NULL) {
11969 	if (ctxt->wellFormed) {
11970 	    ret = ctxt->myDoc->extSubset;
11971 	    ctxt->myDoc->extSubset = NULL;
11972 	    if (ret != NULL) {
11973 		xmlNodePtr tmp;
11974 
11975 		ret->doc = NULL;
11976 		tmp = ret->children;
11977 		while (tmp != NULL) {
11978 		    tmp->doc = NULL;
11979 		    tmp = tmp->next;
11980 		}
11981 	    }
11982 	} else {
11983 	    ret = NULL;
11984 	}
11985         xmlFreeDoc(ctxt->myDoc);
11986         ctxt->myDoc = NULL;
11987     }
11988     if (sax != NULL) ctxt->sax = NULL;
11989     xmlFreeParserCtxt(ctxt);
11990 
11991     return(ret);
11992 }
11993 
11994 
11995 /**
11996  * xmlParseDTD:
11997  * @ExternalID:  a NAME* containing the External ID of the DTD
11998  * @SystemID:  a NAME* containing the URL to the DTD
11999  *
12000  * Load and parse an external subset.
12001  *
12002  * Returns the resulting xmlDtdPtr or NULL in case of error.
12003  */
12004 
12005 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12006 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12007     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12008 }
12009 #endif /* LIBXML_VALID_ENABLED */
12010 
12011 /************************************************************************
12012  *									*
12013  * 		Front ends when parsing an Entity			*
12014  *									*
12015  ************************************************************************/
12016 
12017 /**
12018  * xmlParseCtxtExternalEntity:
12019  * @ctx:  the existing parsing context
12020  * @URL:  the URL for the entity to load
12021  * @ID:  the System ID for the entity to load
12022  * @lst:  the return value for the set of parsed nodes
12023  *
12024  * Parse an external general entity within an existing parsing context
12025  * An external general parsed entity is well-formed if it matches the
12026  * production labeled extParsedEnt.
12027  *
12028  * [78] extParsedEnt ::= TextDecl? content
12029  *
12030  * Returns 0 if the entity is well formed, -1 in case of args problem and
12031  *    the parser error code otherwise
12032  */
12033 
12034 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12035 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12036 	               const xmlChar *ID, xmlNodePtr *lst) {
12037     xmlParserCtxtPtr ctxt;
12038     xmlDocPtr newDoc;
12039     xmlNodePtr newRoot;
12040     xmlSAXHandlerPtr oldsax = NULL;
12041     int ret = 0;
12042     xmlChar start[4];
12043     xmlCharEncoding enc;
12044     xmlParserInputPtr inputStream;
12045     char *directory = NULL;
12046 
12047     if (ctx == NULL) return(-1);
12048 
12049     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12050         (ctx->depth > 1024)) {
12051 	return(XML_ERR_ENTITY_LOOP);
12052     }
12053 
12054     if (lst != NULL)
12055         *lst = NULL;
12056     if ((URL == NULL) && (ID == NULL))
12057 	return(-1);
12058     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12059 	return(-1);
12060 
12061     ctxt = xmlNewParserCtxt();
12062     if (ctxt == NULL) {
12063 	return(-1);
12064     }
12065 
12066     ctxt->userData = ctxt;
12067     ctxt->_private = ctx->_private;
12068 
12069     inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12070     if (inputStream == NULL) {
12071 	xmlFreeParserCtxt(ctxt);
12072 	return(-1);
12073     }
12074 
12075     inputPush(ctxt, inputStream);
12076 
12077     if ((ctxt->directory == NULL) && (directory == NULL))
12078 	directory = xmlParserGetDirectory((char *)URL);
12079     if ((ctxt->directory == NULL) && (directory != NULL))
12080 	ctxt->directory = directory;
12081 
12082     oldsax = ctxt->sax;
12083     ctxt->sax = ctx->sax;
12084     xmlDetectSAX2(ctxt);
12085     newDoc = xmlNewDoc(BAD_CAST "1.0");
12086     if (newDoc == NULL) {
12087 	xmlFreeParserCtxt(ctxt);
12088 	return(-1);
12089     }
12090     newDoc->properties = XML_DOC_INTERNAL;
12091     if (ctx->myDoc->dict) {
12092 	newDoc->dict = ctx->myDoc->dict;
12093 	xmlDictReference(newDoc->dict);
12094     }
12095     if (ctx->myDoc != NULL) {
12096 	newDoc->intSubset = ctx->myDoc->intSubset;
12097 	newDoc->extSubset = ctx->myDoc->extSubset;
12098     }
12099     if (ctx->myDoc->URL != NULL) {
12100 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12101     }
12102     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12103     if (newRoot == NULL) {
12104 	ctxt->sax = oldsax;
12105 	xmlFreeParserCtxt(ctxt);
12106 	newDoc->intSubset = NULL;
12107 	newDoc->extSubset = NULL;
12108         xmlFreeDoc(newDoc);
12109 	return(-1);
12110     }
12111     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12112     nodePush(ctxt, newDoc->children);
12113     if (ctx->myDoc == NULL) {
12114 	ctxt->myDoc = newDoc;
12115     } else {
12116 	ctxt->myDoc = ctx->myDoc;
12117 	newDoc->children->doc = ctx->myDoc;
12118     }
12119 
12120     /*
12121      * Get the 4 first bytes and decode the charset
12122      * if enc != XML_CHAR_ENCODING_NONE
12123      * plug some encoding conversion routines.
12124      */
12125     GROW
12126     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12127 	start[0] = RAW;
12128 	start[1] = NXT(1);
12129 	start[2] = NXT(2);
12130 	start[3] = NXT(3);
12131 	enc = xmlDetectCharEncoding(start, 4);
12132 	if (enc != XML_CHAR_ENCODING_NONE) {
12133 	    xmlSwitchEncoding(ctxt, enc);
12134 	}
12135     }
12136 
12137     /*
12138      * Parse a possible text declaration first
12139      */
12140     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12141 	xmlParseTextDecl(ctxt);
12142 	/*
12143 	 * An XML-1.0 document can't reference an entity not XML-1.0
12144 	 */
12145 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12146 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12147 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12148 	                   "Version mismatch between document and entity\n");
12149 	}
12150     }
12151 
12152     /*
12153      * Doing validity checking on chunk doesn't make sense
12154      */
12155     ctxt->instate = XML_PARSER_CONTENT;
12156     ctxt->validate = ctx->validate;
12157     ctxt->valid = ctx->valid;
12158     ctxt->loadsubset = ctx->loadsubset;
12159     ctxt->depth = ctx->depth + 1;
12160     ctxt->replaceEntities = ctx->replaceEntities;
12161     if (ctxt->validate) {
12162 	ctxt->vctxt.error = ctx->vctxt.error;
12163 	ctxt->vctxt.warning = ctx->vctxt.warning;
12164     } else {
12165 	ctxt->vctxt.error = NULL;
12166 	ctxt->vctxt.warning = NULL;
12167     }
12168     ctxt->vctxt.nodeTab = NULL;
12169     ctxt->vctxt.nodeNr = 0;
12170     ctxt->vctxt.nodeMax = 0;
12171     ctxt->vctxt.node = NULL;
12172     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12173     ctxt->dict = ctx->dict;
12174     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12175     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12176     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12177     ctxt->dictNames = ctx->dictNames;
12178     ctxt->attsDefault = ctx->attsDefault;
12179     ctxt->attsSpecial = ctx->attsSpecial;
12180     ctxt->linenumbers = ctx->linenumbers;
12181 
12182     xmlParseContent(ctxt);
12183 
12184     ctx->validate = ctxt->validate;
12185     ctx->valid = ctxt->valid;
12186     if ((RAW == '<') && (NXT(1) == '/')) {
12187 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12188     } else if (RAW != 0) {
12189 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12190     }
12191     if (ctxt->node != newDoc->children) {
12192 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12193     }
12194 
12195     if (!ctxt->wellFormed) {
12196         if (ctxt->errNo == 0)
12197 	    ret = 1;
12198 	else
12199 	    ret = ctxt->errNo;
12200     } else {
12201 	if (lst != NULL) {
12202 	    xmlNodePtr cur;
12203 
12204 	    /*
12205 	     * Return the newly created nodeset after unlinking it from
12206 	     * they pseudo parent.
12207 	     */
12208 	    cur = newDoc->children->children;
12209 	    *lst = cur;
12210 	    while (cur != NULL) {
12211 		cur->parent = NULL;
12212 		cur = cur->next;
12213 	    }
12214             newDoc->children->children = NULL;
12215 	}
12216 	ret = 0;
12217     }
12218     ctxt->sax = oldsax;
12219     ctxt->dict = NULL;
12220     ctxt->attsDefault = NULL;
12221     ctxt->attsSpecial = NULL;
12222     xmlFreeParserCtxt(ctxt);
12223     newDoc->intSubset = NULL;
12224     newDoc->extSubset = NULL;
12225     xmlFreeDoc(newDoc);
12226 
12227     return(ret);
12228 }
12229 
12230 /**
12231  * xmlParseExternalEntityPrivate:
12232  * @doc:  the document the chunk pertains to
12233  * @oldctxt:  the previous parser context if available
12234  * @sax:  the SAX handler bloc (possibly NULL)
12235  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12236  * @depth:  Used for loop detection, use 0
12237  * @URL:  the URL for the entity to load
12238  * @ID:  the System ID for the entity to load
12239  * @list:  the return value for the set of parsed nodes
12240  *
12241  * Private version of xmlParseExternalEntity()
12242  *
12243  * Returns 0 if the entity is well formed, -1 in case of args problem and
12244  *    the parser error code otherwise
12245  */
12246 
12247 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12248 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12249 	              xmlSAXHandlerPtr sax,
12250 		      void *user_data, int depth, const xmlChar *URL,
12251 		      const xmlChar *ID, xmlNodePtr *list) {
12252     xmlParserCtxtPtr ctxt;
12253     xmlDocPtr newDoc;
12254     xmlNodePtr newRoot;
12255     xmlSAXHandlerPtr oldsax = NULL;
12256     xmlParserErrors ret = XML_ERR_OK;
12257     xmlChar start[4];
12258     xmlCharEncoding enc;
12259 
12260     if (((depth > 40) &&
12261 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12262 	(depth > 1024)) {
12263 	return(XML_ERR_ENTITY_LOOP);
12264     }
12265 
12266     if (list != NULL)
12267         *list = NULL;
12268     if ((URL == NULL) && (ID == NULL))
12269 	return(XML_ERR_INTERNAL_ERROR);
12270     if (doc == NULL)
12271 	return(XML_ERR_INTERNAL_ERROR);
12272 
12273 
12274     ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
12275     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12276     ctxt->userData = ctxt;
12277     if (oldctxt != NULL) {
12278 	ctxt->_private = oldctxt->_private;
12279 	ctxt->loadsubset = oldctxt->loadsubset;
12280 	ctxt->validate = oldctxt->validate;
12281 	ctxt->external = oldctxt->external;
12282 	ctxt->record_info = oldctxt->record_info;
12283 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12284 	ctxt->node_seq.length = oldctxt->node_seq.length;
12285 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12286     } else {
12287 	/*
12288 	 * Doing validity checking on chunk without context
12289 	 * doesn't make sense
12290 	 */
12291 	ctxt->_private = NULL;
12292 	ctxt->validate = 0;
12293 	ctxt->external = 2;
12294 	ctxt->loadsubset = 0;
12295     }
12296     if (sax != NULL) {
12297 	oldsax = ctxt->sax;
12298         ctxt->sax = sax;
12299 	if (user_data != NULL)
12300 	    ctxt->userData = user_data;
12301     }
12302     xmlDetectSAX2(ctxt);
12303     newDoc = xmlNewDoc(BAD_CAST "1.0");
12304     if (newDoc == NULL) {
12305 	ctxt->node_seq.maximum = 0;
12306 	ctxt->node_seq.length = 0;
12307 	ctxt->node_seq.buffer = NULL;
12308 	xmlFreeParserCtxt(ctxt);
12309 	return(XML_ERR_INTERNAL_ERROR);
12310     }
12311     newDoc->properties = XML_DOC_INTERNAL;
12312     newDoc->intSubset = doc->intSubset;
12313     newDoc->extSubset = doc->extSubset;
12314     newDoc->dict = doc->dict;
12315     xmlDictReference(newDoc->dict);
12316 
12317     if (doc->URL != NULL) {
12318 	newDoc->URL = xmlStrdup(doc->URL);
12319     }
12320     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12321     if (newRoot == NULL) {
12322 	if (sax != NULL)
12323 	    ctxt->sax = oldsax;
12324 	ctxt->node_seq.maximum = 0;
12325 	ctxt->node_seq.length = 0;
12326 	ctxt->node_seq.buffer = NULL;
12327 	xmlFreeParserCtxt(ctxt);
12328 	newDoc->intSubset = NULL;
12329 	newDoc->extSubset = NULL;
12330         xmlFreeDoc(newDoc);
12331 	return(XML_ERR_INTERNAL_ERROR);
12332     }
12333     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12334     nodePush(ctxt, newDoc->children);
12335     ctxt->myDoc = doc;
12336     newRoot->doc = doc;
12337 
12338     /*
12339      * Get the 4 first bytes and decode the charset
12340      * if enc != XML_CHAR_ENCODING_NONE
12341      * plug some encoding conversion routines.
12342      */
12343     GROW;
12344     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12345 	start[0] = RAW;
12346 	start[1] = NXT(1);
12347 	start[2] = NXT(2);
12348 	start[3] = NXT(3);
12349 	enc = xmlDetectCharEncoding(start, 4);
12350 	if (enc != XML_CHAR_ENCODING_NONE) {
12351 	    xmlSwitchEncoding(ctxt, enc);
12352 	}
12353     }
12354 
12355     /*
12356      * Parse a possible text declaration first
12357      */
12358     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12359 	xmlParseTextDecl(ctxt);
12360     }
12361 
12362     ctxt->instate = XML_PARSER_CONTENT;
12363     ctxt->depth = depth;
12364 
12365     xmlParseContent(ctxt);
12366 
12367     if ((RAW == '<') && (NXT(1) == '/')) {
12368 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12369     } else if (RAW != 0) {
12370 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12371     }
12372     if (ctxt->node != newDoc->children) {
12373 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12374     }
12375 
12376     if (!ctxt->wellFormed) {
12377         if (ctxt->errNo == 0)
12378 	    ret = XML_ERR_INTERNAL_ERROR;
12379 	else
12380 	    ret = (xmlParserErrors)ctxt->errNo;
12381     } else {
12382 	if (list != NULL) {
12383 	    xmlNodePtr cur;
12384 
12385 	    /*
12386 	     * Return the newly created nodeset after unlinking it from
12387 	     * they pseudo parent.
12388 	     */
12389 	    cur = newDoc->children->children;
12390 	    *list = cur;
12391 	    while (cur != NULL) {
12392 		cur->parent = NULL;
12393 		cur = cur->next;
12394 	    }
12395             newDoc->children->children = NULL;
12396 	}
12397 	ret = XML_ERR_OK;
12398     }
12399 
12400     /*
12401      * Record in the parent context the number of entities replacement
12402      * done when parsing that reference.
12403      */
12404     oldctxt->nbentities += ctxt->nbentities;
12405     /*
12406      * Also record the size of the entity parsed
12407      */
12408     if (ctxt->input != NULL) {
12409 	oldctxt->sizeentities += ctxt->input->consumed;
12410 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12411     }
12412     /*
12413      * And record the last error if any
12414      */
12415     if (ctxt->lastError.code != XML_ERR_OK)
12416         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12417 
12418     if (sax != NULL)
12419 	ctxt->sax = oldsax;
12420     oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12421     oldctxt->node_seq.length = ctxt->node_seq.length;
12422     oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12423     ctxt->node_seq.maximum = 0;
12424     ctxt->node_seq.length = 0;
12425     ctxt->node_seq.buffer = NULL;
12426     xmlFreeParserCtxt(ctxt);
12427     newDoc->intSubset = NULL;
12428     newDoc->extSubset = NULL;
12429     xmlFreeDoc(newDoc);
12430 
12431     return(ret);
12432 }
12433 
12434 #ifdef LIBXML_SAX1_ENABLED
12435 /**
12436  * xmlParseExternalEntity:
12437  * @doc:  the document the chunk pertains to
12438  * @sax:  the SAX handler bloc (possibly NULL)
12439  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12440  * @depth:  Used for loop detection, use 0
12441  * @URL:  the URL for the entity to load
12442  * @ID:  the System ID for the entity to load
12443  * @lst:  the return value for the set of parsed nodes
12444  *
12445  * Parse an external general entity
12446  * An external general parsed entity is well-formed if it matches the
12447  * production labeled extParsedEnt.
12448  *
12449  * [78] extParsedEnt ::= TextDecl? content
12450  *
12451  * Returns 0 if the entity is well formed, -1 in case of args problem and
12452  *    the parser error code otherwise
12453  */
12454 
12455 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12456 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12457 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12458     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12459 		                       ID, lst));
12460 }
12461 
12462 /**
12463  * xmlParseBalancedChunkMemory:
12464  * @doc:  the document the chunk pertains to
12465  * @sax:  the SAX handler bloc (possibly NULL)
12466  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12467  * @depth:  Used for loop detection, use 0
12468  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12469  * @lst:  the return value for the set of parsed nodes
12470  *
12471  * Parse a well-balanced chunk of an XML document
12472  * called by the parser
12473  * The allowed sequence for the Well Balanced Chunk is the one defined by
12474  * the content production in the XML grammar:
12475  *
12476  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12477  *
12478  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12479  *    the parser error code otherwise
12480  */
12481 
12482 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12483 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12484      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12485     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12486                                                 depth, string, lst, 0 );
12487 }
12488 #endif /* LIBXML_SAX1_ENABLED */
12489 
12490 /**
12491  * xmlParseBalancedChunkMemoryInternal:
12492  * @oldctxt:  the existing parsing context
12493  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12494  * @user_data:  the user data field for the parser context
12495  * @lst:  the return value for the set of parsed nodes
12496  *
12497  *
12498  * Parse a well-balanced chunk of an XML document
12499  * called by the parser
12500  * The allowed sequence for the Well Balanced Chunk is the one defined by
12501  * the content production in the XML grammar:
12502  *
12503  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12504  *
12505  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12506  * error code otherwise
12507  *
12508  * In case recover is set to 1, the nodelist will not be empty even if
12509  * the parsed chunk is not well balanced.
12510  */
12511 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12512 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12513 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12514     xmlParserCtxtPtr ctxt;
12515     xmlDocPtr newDoc = NULL;
12516     xmlNodePtr newRoot;
12517     xmlSAXHandlerPtr oldsax = NULL;
12518     xmlNodePtr content = NULL;
12519     xmlNodePtr last = NULL;
12520     int size;
12521     xmlParserErrors ret = XML_ERR_OK;
12522 
12523     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12524         (oldctxt->depth >  1024)) {
12525 	return(XML_ERR_ENTITY_LOOP);
12526     }
12527 
12528 
12529     if (lst != NULL)
12530         *lst = NULL;
12531     if (string == NULL)
12532         return(XML_ERR_INTERNAL_ERROR);
12533 
12534     size = xmlStrlen(string);
12535 
12536     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12537     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12538     if (user_data != NULL)
12539 	ctxt->userData = user_data;
12540     else
12541 	ctxt->userData = ctxt;
12542     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12543     ctxt->dict = oldctxt->dict;
12544     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12545     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12546     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12547 
12548     ctxt->nsParent = oldctxt;
12549 
12550     oldsax = ctxt->sax;
12551     ctxt->sax = oldctxt->sax;
12552     xmlDetectSAX2(ctxt);
12553     ctxt->replaceEntities = oldctxt->replaceEntities;
12554     ctxt->options = oldctxt->options;
12555 
12556     ctxt->_private = oldctxt->_private;
12557     if (oldctxt->myDoc == NULL) {
12558 	newDoc = xmlNewDoc(BAD_CAST "1.0");
12559 	if (newDoc == NULL) {
12560 	    ctxt->sax = oldsax;
12561 	    ctxt->dict = NULL;
12562 	    xmlFreeParserCtxt(ctxt);
12563 	    return(XML_ERR_INTERNAL_ERROR);
12564 	}
12565 	newDoc->properties = XML_DOC_INTERNAL;
12566 	newDoc->dict = ctxt->dict;
12567 	xmlDictReference(newDoc->dict);
12568 	ctxt->myDoc = newDoc;
12569     } else {
12570 	ctxt->myDoc = oldctxt->myDoc;
12571         content = ctxt->myDoc->children;
12572 	last = ctxt->myDoc->last;
12573     }
12574     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12575     if (newRoot == NULL) {
12576 	ctxt->sax = oldsax;
12577 	ctxt->dict = NULL;
12578 	xmlFreeParserCtxt(ctxt);
12579 	if (newDoc != NULL) {
12580 	    xmlFreeDoc(newDoc);
12581 	}
12582 	return(XML_ERR_INTERNAL_ERROR);
12583     }
12584     ctxt->myDoc->children = NULL;
12585     ctxt->myDoc->last = NULL;
12586     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12587     nodePush(ctxt, ctxt->myDoc->children);
12588     ctxt->instate = XML_PARSER_CONTENT;
12589     ctxt->depth = oldctxt->depth + 1;
12590 
12591     ctxt->validate = 0;
12592     ctxt->loadsubset = oldctxt->loadsubset;
12593     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12594 	/*
12595 	 * ID/IDREF registration will be done in xmlValidateElement below
12596 	 */
12597 	ctxt->loadsubset |= XML_SKIP_IDS;
12598     }
12599     ctxt->dictNames = oldctxt->dictNames;
12600     ctxt->attsDefault = oldctxt->attsDefault;
12601     ctxt->attsSpecial = oldctxt->attsSpecial;
12602 
12603     xmlParseContent(ctxt);
12604     if ((RAW == '<') && (NXT(1) == '/')) {
12605 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12606     } else if (RAW != 0) {
12607 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12608     }
12609     if (ctxt->node != ctxt->myDoc->children) {
12610 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12611     }
12612 
12613     if (!ctxt->wellFormed) {
12614         if (ctxt->errNo == 0)
12615 	    ret = XML_ERR_INTERNAL_ERROR;
12616 	else
12617 	    ret = (xmlParserErrors)ctxt->errNo;
12618     } else {
12619       ret = XML_ERR_OK;
12620     }
12621 
12622     if ((lst != NULL) && (ret == XML_ERR_OK)) {
12623 	xmlNodePtr cur;
12624 
12625 	/*
12626 	 * Return the newly created nodeset after unlinking it from
12627 	 * they pseudo parent.
12628 	 */
12629 	cur = ctxt->myDoc->children->children;
12630 	*lst = cur;
12631 	while (cur != NULL) {
12632 #ifdef LIBXML_VALID_ENABLED
12633 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12634 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12635 		(cur->type == XML_ELEMENT_NODE)) {
12636 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12637 			oldctxt->myDoc, cur);
12638 	    }
12639 #endif /* LIBXML_VALID_ENABLED */
12640 	    cur->parent = NULL;
12641 	    cur = cur->next;
12642 	}
12643 	ctxt->myDoc->children->children = NULL;
12644     }
12645     if (ctxt->myDoc != NULL) {
12646 	xmlFreeNode(ctxt->myDoc->children);
12647         ctxt->myDoc->children = content;
12648         ctxt->myDoc->last = last;
12649     }
12650 
12651     /*
12652      * Record in the parent context the number of entities replacement
12653      * done when parsing that reference.
12654      */
12655     oldctxt->nbentities += ctxt->nbentities;
12656     /*
12657      * Also record the last error if any
12658      */
12659     if (ctxt->lastError.code != XML_ERR_OK)
12660         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12661 
12662     ctxt->sax = oldsax;
12663     ctxt->dict = NULL;
12664     ctxt->attsDefault = NULL;
12665     ctxt->attsSpecial = NULL;
12666     xmlFreeParserCtxt(ctxt);
12667     if (newDoc != NULL) {
12668 	xmlFreeDoc(newDoc);
12669     }
12670 
12671     return(ret);
12672 }
12673 
12674 /**
12675  * xmlParseInNodeContext:
12676  * @node:  the context node
12677  * @data:  the input string
12678  * @datalen:  the input string length in bytes
12679  * @options:  a combination of xmlParserOption
12680  * @lst:  the return value for the set of parsed nodes
12681  *
12682  * Parse a well-balanced chunk of an XML document
12683  * within the context (DTD, namespaces, etc ...) of the given node.
12684  *
12685  * The allowed sequence for the data is a Well Balanced Chunk defined by
12686  * the content production in the XML grammar:
12687  *
12688  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12689  *
12690  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12691  * error code otherwise
12692  */
12693 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12694 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12695                       int options, xmlNodePtr *lst) {
12696 #ifdef SAX2
12697     xmlParserCtxtPtr ctxt;
12698     xmlDocPtr doc = NULL;
12699     xmlNodePtr fake, cur;
12700     int nsnr = 0;
12701 
12702     xmlParserErrors ret = XML_ERR_OK;
12703 
12704     /*
12705      * check all input parameters, grab the document
12706      */
12707     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12708         return(XML_ERR_INTERNAL_ERROR);
12709     switch (node->type) {
12710         case XML_ELEMENT_NODE:
12711         case XML_ATTRIBUTE_NODE:
12712         case XML_TEXT_NODE:
12713         case XML_CDATA_SECTION_NODE:
12714         case XML_ENTITY_REF_NODE:
12715         case XML_PI_NODE:
12716         case XML_COMMENT_NODE:
12717         case XML_DOCUMENT_NODE:
12718         case XML_HTML_DOCUMENT_NODE:
12719 	    break;
12720 	default:
12721 	    return(XML_ERR_INTERNAL_ERROR);
12722 
12723     }
12724     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12725            (node->type != XML_DOCUMENT_NODE) &&
12726 	   (node->type != XML_HTML_DOCUMENT_NODE))
12727 	node = node->parent;
12728     if (node == NULL)
12729 	return(XML_ERR_INTERNAL_ERROR);
12730     if (node->type == XML_ELEMENT_NODE)
12731 	doc = node->doc;
12732     else
12733         doc = (xmlDocPtr) node;
12734     if (doc == NULL)
12735 	return(XML_ERR_INTERNAL_ERROR);
12736 
12737     /*
12738      * allocate a context and set-up everything not related to the
12739      * node position in the tree
12740      */
12741     if (doc->type == XML_DOCUMENT_NODE)
12742 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12743 #ifdef LIBXML_HTML_ENABLED
12744     else if (doc->type == XML_HTML_DOCUMENT_NODE)
12745 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12746 #endif
12747     else
12748         return(XML_ERR_INTERNAL_ERROR);
12749 
12750     if (ctxt == NULL)
12751         return(XML_ERR_NO_MEMORY);
12752     fake = xmlNewComment(NULL);
12753     if (fake == NULL) {
12754         xmlFreeParserCtxt(ctxt);
12755 	return(XML_ERR_NO_MEMORY);
12756     }
12757     xmlAddChild(node, fake);
12758 
12759     /*
12760      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12761      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12762      * we must wait until the last moment to free the original one.
12763      */
12764     if (doc->dict != NULL) {
12765         if (ctxt->dict != NULL)
12766 	    xmlDictFree(ctxt->dict);
12767 	ctxt->dict = doc->dict;
12768     } else
12769         options |= XML_PARSE_NODICT;
12770 
12771     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
12772     xmlDetectSAX2(ctxt);
12773     ctxt->myDoc = doc;
12774 
12775     if (node->type == XML_ELEMENT_NODE) {
12776 	nodePush(ctxt, node);
12777 	/*
12778 	 * initialize the SAX2 namespaces stack
12779 	 */
12780 	cur = node;
12781 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12782 	    xmlNsPtr ns = cur->nsDef;
12783 	    const xmlChar *iprefix, *ihref;
12784 
12785 	    while (ns != NULL) {
12786 		if (ctxt->dict) {
12787 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12788 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12789 		} else {
12790 		    iprefix = ns->prefix;
12791 		    ihref = ns->href;
12792 		}
12793 
12794 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12795 		    nsPush(ctxt, iprefix, ihref);
12796 		    nsnr++;
12797 		}
12798 		ns = ns->next;
12799 	    }
12800 	    cur = cur->parent;
12801 	}
12802 	ctxt->instate = XML_PARSER_CONTENT;
12803     }
12804 
12805     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12806 	/*
12807 	 * ID/IDREF registration will be done in xmlValidateElement below
12808 	 */
12809 	ctxt->loadsubset |= XML_SKIP_IDS;
12810     }
12811 
12812 #ifdef LIBXML_HTML_ENABLED
12813     if (doc->type == XML_HTML_DOCUMENT_NODE)
12814         __htmlParseContent(ctxt);
12815     else
12816 #endif
12817 	xmlParseContent(ctxt);
12818 
12819     nsPop(ctxt, nsnr);
12820     if ((RAW == '<') && (NXT(1) == '/')) {
12821 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12822     } else if (RAW != 0) {
12823 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12824     }
12825     if ((ctxt->node != NULL) && (ctxt->node != node)) {
12826 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12827 	ctxt->wellFormed = 0;
12828     }
12829 
12830     if (!ctxt->wellFormed) {
12831         if (ctxt->errNo == 0)
12832 	    ret = XML_ERR_INTERNAL_ERROR;
12833 	else
12834 	    ret = (xmlParserErrors)ctxt->errNo;
12835     } else {
12836         ret = XML_ERR_OK;
12837     }
12838 
12839     /*
12840      * Return the newly created nodeset after unlinking it from
12841      * the pseudo sibling.
12842      */
12843 
12844     cur = fake->next;
12845     fake->next = NULL;
12846     node->last = fake;
12847 
12848     if (cur != NULL) {
12849 	cur->prev = NULL;
12850     }
12851 
12852     *lst = cur;
12853 
12854     while (cur != NULL) {
12855 	cur->parent = NULL;
12856 	cur = cur->next;
12857     }
12858 
12859     xmlUnlinkNode(fake);
12860     xmlFreeNode(fake);
12861 
12862 
12863     if (ret != XML_ERR_OK) {
12864         xmlFreeNodeList(*lst);
12865 	*lst = NULL;
12866     }
12867 
12868     if (doc->dict != NULL)
12869         ctxt->dict = NULL;
12870     xmlFreeParserCtxt(ctxt);
12871 
12872     return(ret);
12873 #else /* !SAX2 */
12874     return(XML_ERR_INTERNAL_ERROR);
12875 #endif
12876 }
12877 
12878 #ifdef LIBXML_SAX1_ENABLED
12879 /**
12880  * xmlParseBalancedChunkMemoryRecover:
12881  * @doc:  the document the chunk pertains to
12882  * @sax:  the SAX handler bloc (possibly NULL)
12883  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12884  * @depth:  Used for loop detection, use 0
12885  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12886  * @lst:  the return value for the set of parsed nodes
12887  * @recover: return nodes even if the data is broken (use 0)
12888  *
12889  *
12890  * Parse a well-balanced chunk of an XML document
12891  * called by the parser
12892  * The allowed sequence for the Well Balanced Chunk is the one defined by
12893  * the content production in the XML grammar:
12894  *
12895  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12896  *
12897  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12898  *    the parser error code otherwise
12899  *
12900  * In case recover is set to 1, the nodelist will not be empty even if
12901  * the parsed chunk is not well balanced, assuming the parsing succeeded to
12902  * some extent.
12903  */
12904 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)12905 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12906      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12907      int recover) {
12908     xmlParserCtxtPtr ctxt;
12909     xmlDocPtr newDoc;
12910     xmlSAXHandlerPtr oldsax = NULL;
12911     xmlNodePtr content, newRoot;
12912     int size;
12913     int ret = 0;
12914 
12915     if (depth > 40) {
12916 	return(XML_ERR_ENTITY_LOOP);
12917     }
12918 
12919 
12920     if (lst != NULL)
12921         *lst = NULL;
12922     if (string == NULL)
12923         return(-1);
12924 
12925     size = xmlStrlen(string);
12926 
12927     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12928     if (ctxt == NULL) return(-1);
12929     ctxt->userData = ctxt;
12930     if (sax != NULL) {
12931 	oldsax = ctxt->sax;
12932         ctxt->sax = sax;
12933 	if (user_data != NULL)
12934 	    ctxt->userData = user_data;
12935     }
12936     newDoc = xmlNewDoc(BAD_CAST "1.0");
12937     if (newDoc == NULL) {
12938 	xmlFreeParserCtxt(ctxt);
12939 	return(-1);
12940     }
12941     newDoc->properties = XML_DOC_INTERNAL;
12942     if ((doc != NULL) && (doc->dict != NULL)) {
12943         xmlDictFree(ctxt->dict);
12944 	ctxt->dict = doc->dict;
12945 	xmlDictReference(ctxt->dict);
12946 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12947 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12948 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12949 	ctxt->dictNames = 1;
12950     } else {
12951 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
12952     }
12953     if (doc != NULL) {
12954 	newDoc->intSubset = doc->intSubset;
12955 	newDoc->extSubset = doc->extSubset;
12956     }
12957     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12958     if (newRoot == NULL) {
12959 	if (sax != NULL)
12960 	    ctxt->sax = oldsax;
12961 	xmlFreeParserCtxt(ctxt);
12962 	newDoc->intSubset = NULL;
12963 	newDoc->extSubset = NULL;
12964         xmlFreeDoc(newDoc);
12965 	return(-1);
12966     }
12967     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12968     nodePush(ctxt, newRoot);
12969     if (doc == NULL) {
12970 	ctxt->myDoc = newDoc;
12971     } else {
12972 	ctxt->myDoc = newDoc;
12973 	newDoc->children->doc = doc;
12974 	/* Ensure that doc has XML spec namespace */
12975 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12976 	newDoc->oldNs = doc->oldNs;
12977     }
12978     ctxt->instate = XML_PARSER_CONTENT;
12979     ctxt->depth = depth;
12980 
12981     /*
12982      * Doing validity checking on chunk doesn't make sense
12983      */
12984     ctxt->validate = 0;
12985     ctxt->loadsubset = 0;
12986     xmlDetectSAX2(ctxt);
12987 
12988     if ( doc != NULL ){
12989         content = doc->children;
12990         doc->children = NULL;
12991         xmlParseContent(ctxt);
12992         doc->children = content;
12993     }
12994     else {
12995         xmlParseContent(ctxt);
12996     }
12997     if ((RAW == '<') && (NXT(1) == '/')) {
12998 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12999     } else if (RAW != 0) {
13000 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13001     }
13002     if (ctxt->node != newDoc->children) {
13003 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13004     }
13005 
13006     if (!ctxt->wellFormed) {
13007         if (ctxt->errNo == 0)
13008 	    ret = 1;
13009 	else
13010 	    ret = ctxt->errNo;
13011     } else {
13012       ret = 0;
13013     }
13014 
13015     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13016 	xmlNodePtr cur;
13017 
13018 	/*
13019 	 * Return the newly created nodeset after unlinking it from
13020 	 * they pseudo parent.
13021 	 */
13022 	cur = newDoc->children->children;
13023 	*lst = cur;
13024 	while (cur != NULL) {
13025 	    xmlSetTreeDoc(cur, doc);
13026 	    cur->parent = NULL;
13027 	    cur = cur->next;
13028 	}
13029 	newDoc->children->children = NULL;
13030     }
13031 
13032     if (sax != NULL)
13033 	ctxt->sax = oldsax;
13034     xmlFreeParserCtxt(ctxt);
13035     newDoc->intSubset = NULL;
13036     newDoc->extSubset = NULL;
13037     newDoc->oldNs = NULL;
13038     xmlFreeDoc(newDoc);
13039 
13040     return(ret);
13041 }
13042 
13043 /**
13044  * xmlSAXParseEntity:
13045  * @sax:  the SAX handler block
13046  * @filename:  the filename
13047  *
13048  * parse an XML external entity out of context and build a tree.
13049  * It use the given SAX function block to handle the parsing callback.
13050  * If sax is NULL, fallback to the default DOM tree building routines.
13051  *
13052  * [78] extParsedEnt ::= TextDecl? content
13053  *
13054  * This correspond to a "Well Balanced" chunk
13055  *
13056  * Returns the resulting document tree
13057  */
13058 
13059 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13060 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13061     xmlDocPtr ret;
13062     xmlParserCtxtPtr ctxt;
13063 
13064     ctxt = xmlCreateFileParserCtxt(filename);
13065     if (ctxt == NULL) {
13066 	return(NULL);
13067     }
13068     if (sax != NULL) {
13069 	if (ctxt->sax != NULL)
13070 	    xmlFree(ctxt->sax);
13071         ctxt->sax = sax;
13072         ctxt->userData = NULL;
13073     }
13074 
13075     xmlParseExtParsedEnt(ctxt);
13076 
13077     if (ctxt->wellFormed)
13078 	ret = ctxt->myDoc;
13079     else {
13080         ret = NULL;
13081         xmlFreeDoc(ctxt->myDoc);
13082         ctxt->myDoc = NULL;
13083     }
13084     if (sax != NULL)
13085         ctxt->sax = NULL;
13086     xmlFreeParserCtxt(ctxt);
13087 
13088     return(ret);
13089 }
13090 
13091 /**
13092  * xmlParseEntity:
13093  * @filename:  the filename
13094  *
13095  * parse an XML external entity out of context and build a tree.
13096  *
13097  * [78] extParsedEnt ::= TextDecl? content
13098  *
13099  * This correspond to a "Well Balanced" chunk
13100  *
13101  * Returns the resulting document tree
13102  */
13103 
13104 xmlDocPtr
xmlParseEntity(const char * filename)13105 xmlParseEntity(const char *filename) {
13106     return(xmlSAXParseEntity(NULL, filename));
13107 }
13108 #endif /* LIBXML_SAX1_ENABLED */
13109 
13110 /**
13111  * xmlCreateEntityParserCtxt:
13112  * @URL:  the entity URL
13113  * @ID:  the entity PUBLIC ID
13114  * @base:  a possible base for the target URI
13115  *
13116  * Create a parser context for an external entity
13117  * Automatic support for ZLIB/Compress compressed document is provided
13118  * by default if found at compile-time.
13119  *
13120  * Returns the new parser context or NULL
13121  */
13122 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13123 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13124 	                  const xmlChar *base) {
13125     xmlParserCtxtPtr ctxt;
13126     xmlParserInputPtr inputStream;
13127     char *directory = NULL;
13128     xmlChar *uri;
13129 
13130     ctxt = xmlNewParserCtxt();
13131     if (ctxt == NULL) {
13132 	return(NULL);
13133     }
13134 
13135     uri = xmlBuildURI(URL, base);
13136 
13137     if (uri == NULL) {
13138 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13139 	if (inputStream == NULL) {
13140 	    xmlFreeParserCtxt(ctxt);
13141 	    return(NULL);
13142 	}
13143 
13144 	inputPush(ctxt, inputStream);
13145 
13146 	if ((ctxt->directory == NULL) && (directory == NULL))
13147 	    directory = xmlParserGetDirectory((char *)URL);
13148 	if ((ctxt->directory == NULL) && (directory != NULL))
13149 	    ctxt->directory = directory;
13150     } else {
13151 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13152 	if (inputStream == NULL) {
13153 	    xmlFree(uri);
13154 	    xmlFreeParserCtxt(ctxt);
13155 	    return(NULL);
13156 	}
13157 
13158 	inputPush(ctxt, inputStream);
13159 
13160 	if ((ctxt->directory == NULL) && (directory == NULL))
13161 	    directory = xmlParserGetDirectory((char *)uri);
13162 	if ((ctxt->directory == NULL) && (directory != NULL))
13163 	    ctxt->directory = directory;
13164 	xmlFree(uri);
13165     }
13166     return(ctxt);
13167 }
13168 
13169 /************************************************************************
13170  *									*
13171  *		Front ends when parsing from a file			*
13172  *									*
13173  ************************************************************************/
13174 
13175 /**
13176  * xmlCreateURLParserCtxt:
13177  * @filename:  the filename or URL
13178  * @options:  a combination of xmlParserOption
13179  *
13180  * Create a parser context for a file or URL content.
13181  * Automatic support for ZLIB/Compress compressed document is provided
13182  * by default if found at compile-time and for file accesses
13183  *
13184  * Returns the new parser context or NULL
13185  */
13186 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13187 xmlCreateURLParserCtxt(const char *filename, int options)
13188 {
13189     xmlParserCtxtPtr ctxt;
13190     xmlParserInputPtr inputStream;
13191     char *directory = NULL;
13192 
13193     ctxt = xmlNewParserCtxt();
13194     if (ctxt == NULL) {
13195 	xmlErrMemory(NULL, "cannot allocate parser context");
13196 	return(NULL);
13197     }
13198 
13199     if (options)
13200 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13201     ctxt->linenumbers = 1;
13202 
13203     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13204     if (inputStream == NULL) {
13205 	xmlFreeParserCtxt(ctxt);
13206 	return(NULL);
13207     }
13208 
13209     inputPush(ctxt, inputStream);
13210     if ((ctxt->directory == NULL) && (directory == NULL))
13211         directory = xmlParserGetDirectory(filename);
13212     if ((ctxt->directory == NULL) && (directory != NULL))
13213         ctxt->directory = directory;
13214 
13215     return(ctxt);
13216 }
13217 
13218 /**
13219  * xmlCreateFileParserCtxt:
13220  * @filename:  the filename
13221  *
13222  * Create a parser context for a file content.
13223  * Automatic support for ZLIB/Compress compressed document is provided
13224  * by default if found at compile-time.
13225  *
13226  * Returns the new parser context or NULL
13227  */
13228 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13229 xmlCreateFileParserCtxt(const char *filename)
13230 {
13231     return(xmlCreateURLParserCtxt(filename, 0));
13232 }
13233 
13234 #ifdef LIBXML_SAX1_ENABLED
13235 /**
13236  * xmlSAXParseFileWithData:
13237  * @sax:  the SAX handler block
13238  * @filename:  the filename
13239  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13240  *             documents
13241  * @data:  the userdata
13242  *
13243  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13244  * compressed document is provided by default if found at compile-time.
13245  * It use the given SAX function block to handle the parsing callback.
13246  * If sax is NULL, fallback to the default DOM tree building routines.
13247  *
13248  * User data (void *) is stored within the parser context in the
13249  * context's _private member, so it is available nearly everywhere in libxml
13250  *
13251  * Returns the resulting document tree
13252  */
13253 
13254 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13255 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13256                         int recovery, void *data) {
13257     xmlDocPtr ret;
13258     xmlParserCtxtPtr ctxt;
13259 
13260     xmlInitParser();
13261 
13262     ctxt = xmlCreateFileParserCtxt(filename);
13263     if (ctxt == NULL) {
13264 	return(NULL);
13265     }
13266     if (sax != NULL) {
13267 	if (ctxt->sax != NULL)
13268 	    xmlFree(ctxt->sax);
13269         ctxt->sax = sax;
13270     }
13271     xmlDetectSAX2(ctxt);
13272     if (data!=NULL) {
13273 	ctxt->_private = data;
13274     }
13275 
13276     if (ctxt->directory == NULL)
13277         ctxt->directory = xmlParserGetDirectory(filename);
13278 
13279     ctxt->recovery = recovery;
13280 
13281     xmlParseDocument(ctxt);
13282 
13283     if ((ctxt->wellFormed) || recovery) {
13284         ret = ctxt->myDoc;
13285 	if (ret != NULL) {
13286 	    if (ctxt->input->buf->compressed > 0)
13287 		ret->compression = 9;
13288 	    else
13289 		ret->compression = ctxt->input->buf->compressed;
13290 	}
13291     }
13292     else {
13293        ret = NULL;
13294        xmlFreeDoc(ctxt->myDoc);
13295        ctxt->myDoc = NULL;
13296     }
13297     if (sax != NULL)
13298         ctxt->sax = NULL;
13299     xmlFreeParserCtxt(ctxt);
13300 
13301     return(ret);
13302 }
13303 
13304 /**
13305  * xmlSAXParseFile:
13306  * @sax:  the SAX handler block
13307  * @filename:  the filename
13308  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13309  *             documents
13310  *
13311  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13312  * compressed document is provided by default if found at compile-time.
13313  * It use the given SAX function block to handle the parsing callback.
13314  * If sax is NULL, fallback to the default DOM tree building routines.
13315  *
13316  * Returns the resulting document tree
13317  */
13318 
13319 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13320 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13321                           int recovery) {
13322     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13323 }
13324 
13325 /**
13326  * xmlRecoverDoc:
13327  * @cur:  a pointer to an array of xmlChar
13328  *
13329  * parse an XML in-memory document and build a tree.
13330  * In the case the document is not Well Formed, a attempt to build a
13331  * tree is tried anyway
13332  *
13333  * Returns the resulting document tree or NULL in case of failure
13334  */
13335 
13336 xmlDocPtr
xmlRecoverDoc(xmlChar * cur)13337 xmlRecoverDoc(xmlChar *cur) {
13338     return(xmlSAXParseDoc(NULL, cur, 1));
13339 }
13340 
13341 /**
13342  * xmlParseFile:
13343  * @filename:  the filename
13344  *
13345  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13346  * compressed document is provided by default if found at compile-time.
13347  *
13348  * Returns the resulting document tree if the file was wellformed,
13349  * NULL otherwise.
13350  */
13351 
13352 xmlDocPtr
xmlParseFile(const char * filename)13353 xmlParseFile(const char *filename) {
13354     return(xmlSAXParseFile(NULL, filename, 0));
13355 }
13356 
13357 /**
13358  * xmlRecoverFile:
13359  * @filename:  the filename
13360  *
13361  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13362  * compressed document is provided by default if found at compile-time.
13363  * In the case the document is not Well Formed, it attempts to build
13364  * a tree anyway
13365  *
13366  * Returns the resulting document tree or NULL in case of failure
13367  */
13368 
13369 xmlDocPtr
xmlRecoverFile(const char * filename)13370 xmlRecoverFile(const char *filename) {
13371     return(xmlSAXParseFile(NULL, filename, 1));
13372 }
13373 
13374 
13375 /**
13376  * xmlSetupParserForBuffer:
13377  * @ctxt:  an XML parser context
13378  * @buffer:  a xmlChar * buffer
13379  * @filename:  a file name
13380  *
13381  * Setup the parser context to parse a new buffer; Clears any prior
13382  * contents from the parser context. The buffer parameter must not be
13383  * NULL, but the filename parameter can be
13384  */
13385 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13386 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13387                              const char* filename)
13388 {
13389     xmlParserInputPtr input;
13390 
13391     if ((ctxt == NULL) || (buffer == NULL))
13392         return;
13393 
13394     input = xmlNewInputStream(ctxt);
13395     if (input == NULL) {
13396         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13397         xmlClearParserCtxt(ctxt);
13398         return;
13399     }
13400 
13401     xmlClearParserCtxt(ctxt);
13402     if (filename != NULL)
13403         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13404     input->base = buffer;
13405     input->cur = buffer;
13406     input->end = &buffer[xmlStrlen(buffer)];
13407     inputPush(ctxt, input);
13408 }
13409 
13410 /**
13411  * xmlSAXUserParseFile:
13412  * @sax:  a SAX handler
13413  * @user_data:  The user data returned on SAX callbacks
13414  * @filename:  a file name
13415  *
13416  * parse an XML file and call the given SAX handler routines.
13417  * Automatic support for ZLIB/Compress compressed document is provided
13418  *
13419  * Returns 0 in case of success or a error number otherwise
13420  */
13421 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13422 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13423                     const char *filename) {
13424     int ret = 0;
13425     xmlParserCtxtPtr ctxt;
13426 
13427     ctxt = xmlCreateFileParserCtxt(filename);
13428     if (ctxt == NULL) return -1;
13429     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13430 	xmlFree(ctxt->sax);
13431     ctxt->sax = sax;
13432     xmlDetectSAX2(ctxt);
13433 
13434     if (user_data != NULL)
13435 	ctxt->userData = user_data;
13436 
13437     xmlParseDocument(ctxt);
13438 
13439     if (ctxt->wellFormed)
13440 	ret = 0;
13441     else {
13442         if (ctxt->errNo != 0)
13443 	    ret = ctxt->errNo;
13444 	else
13445 	    ret = -1;
13446     }
13447     if (sax != NULL)
13448 	ctxt->sax = NULL;
13449     if (ctxt->myDoc != NULL) {
13450         xmlFreeDoc(ctxt->myDoc);
13451 	ctxt->myDoc = NULL;
13452     }
13453     xmlFreeParserCtxt(ctxt);
13454 
13455     return ret;
13456 }
13457 #endif /* LIBXML_SAX1_ENABLED */
13458 
13459 /************************************************************************
13460  *									*
13461  * 		Front ends when parsing from memory			*
13462  *									*
13463  ************************************************************************/
13464 
13465 /**
13466  * xmlCreateMemoryParserCtxt:
13467  * @buffer:  a pointer to a char array
13468  * @size:  the size of the array
13469  *
13470  * Create a parser context for an XML in-memory document.
13471  *
13472  * Returns the new parser context or NULL
13473  */
13474 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)13475 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13476     xmlParserCtxtPtr ctxt;
13477     xmlParserInputPtr input;
13478     xmlParserInputBufferPtr buf;
13479 
13480     if (buffer == NULL)
13481 	return(NULL);
13482     if (size <= 0)
13483 	return(NULL);
13484 
13485     ctxt = xmlNewParserCtxt();
13486     if (ctxt == NULL)
13487 	return(NULL);
13488 
13489     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13490     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13491     if (buf == NULL) {
13492 	xmlFreeParserCtxt(ctxt);
13493 	return(NULL);
13494     }
13495 
13496     input = xmlNewInputStream(ctxt);
13497     if (input == NULL) {
13498 	xmlFreeParserInputBuffer(buf);
13499 	xmlFreeParserCtxt(ctxt);
13500 	return(NULL);
13501     }
13502 
13503     input->filename = NULL;
13504     input->buf = buf;
13505     input->base = input->buf->buffer->content;
13506     input->cur = input->buf->buffer->content;
13507     input->end = &input->buf->buffer->content[input->buf->buffer->use];
13508 
13509     inputPush(ctxt, input);
13510     return(ctxt);
13511 }
13512 
13513 #ifdef LIBXML_SAX1_ENABLED
13514 /**
13515  * xmlSAXParseMemoryWithData:
13516  * @sax:  the SAX handler block
13517  * @buffer:  an pointer to a char array
13518  * @size:  the size of the array
13519  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13520  *             documents
13521  * @data:  the userdata
13522  *
13523  * parse an XML in-memory block and use the given SAX function block
13524  * to handle the parsing callback. If sax is NULL, fallback to the default
13525  * DOM tree building routines.
13526  *
13527  * User data (void *) is stored within the parser context in the
13528  * context's _private member, so it is available nearly everywhere in libxml
13529  *
13530  * Returns the resulting document tree
13531  */
13532 
13533 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13534 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13535 	          int size, int recovery, void *data) {
13536     xmlDocPtr ret;
13537     xmlParserCtxtPtr ctxt;
13538 
13539     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13540     if (ctxt == NULL) return(NULL);
13541     if (sax != NULL) {
13542 	if (ctxt->sax != NULL)
13543 	    xmlFree(ctxt->sax);
13544         ctxt->sax = sax;
13545     }
13546     xmlDetectSAX2(ctxt);
13547     if (data!=NULL) {
13548 	ctxt->_private=data;
13549     }
13550 
13551     ctxt->recovery = recovery;
13552 
13553     xmlParseDocument(ctxt);
13554 
13555     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13556     else {
13557        ret = NULL;
13558        xmlFreeDoc(ctxt->myDoc);
13559        ctxt->myDoc = NULL;
13560     }
13561     if (sax != NULL)
13562 	ctxt->sax = NULL;
13563     xmlFreeParserCtxt(ctxt);
13564 
13565     return(ret);
13566 }
13567 
13568 /**
13569  * xmlSAXParseMemory:
13570  * @sax:  the SAX handler block
13571  * @buffer:  an pointer to a char array
13572  * @size:  the size of the array
13573  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13574  *             documents
13575  *
13576  * parse an XML in-memory block and use the given SAX function block
13577  * to handle the parsing callback. If sax is NULL, fallback to the default
13578  * DOM tree building routines.
13579  *
13580  * Returns the resulting document tree
13581  */
13582 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13583 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13584 	          int size, int recovery) {
13585     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13586 }
13587 
13588 /**
13589  * xmlParseMemory:
13590  * @buffer:  an pointer to a char array
13591  * @size:  the size of the array
13592  *
13593  * parse an XML in-memory block and build a tree.
13594  *
13595  * Returns the resulting document tree
13596  */
13597 
xmlParseMemory(const char * buffer,int size)13598 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13599    return(xmlSAXParseMemory(NULL, buffer, size, 0));
13600 }
13601 
13602 /**
13603  * xmlRecoverMemory:
13604  * @buffer:  an pointer to a char array
13605  * @size:  the size of the array
13606  *
13607  * parse an XML in-memory block and build a tree.
13608  * In the case the document is not Well Formed, an attempt to
13609  * build a tree is tried anyway
13610  *
13611  * Returns the resulting document tree or NULL in case of error
13612  */
13613 
xmlRecoverMemory(const char * buffer,int size)13614 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13615    return(xmlSAXParseMemory(NULL, buffer, size, 1));
13616 }
13617 
13618 /**
13619  * xmlSAXUserParseMemory:
13620  * @sax:  a SAX handler
13621  * @user_data:  The user data returned on SAX callbacks
13622  * @buffer:  an in-memory XML document input
13623  * @size:  the length of the XML document in bytes
13624  *
13625  * A better SAX parsing routine.
13626  * parse an XML in-memory buffer and call the given SAX handler routines.
13627  *
13628  * Returns 0 in case of success or a error number otherwise
13629  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13630 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13631 			  const char *buffer, int size) {
13632     int ret = 0;
13633     xmlParserCtxtPtr ctxt;
13634 
13635     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13636     if (ctxt == NULL) return -1;
13637     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13638         xmlFree(ctxt->sax);
13639     ctxt->sax = sax;
13640     xmlDetectSAX2(ctxt);
13641 
13642     if (user_data != NULL)
13643 	ctxt->userData = user_data;
13644 
13645     xmlParseDocument(ctxt);
13646 
13647     if (ctxt->wellFormed)
13648 	ret = 0;
13649     else {
13650         if (ctxt->errNo != 0)
13651 	    ret = ctxt->errNo;
13652 	else
13653 	    ret = -1;
13654     }
13655     if (sax != NULL)
13656         ctxt->sax = NULL;
13657     if (ctxt->myDoc != NULL) {
13658         xmlFreeDoc(ctxt->myDoc);
13659 	ctxt->myDoc = NULL;
13660     }
13661     xmlFreeParserCtxt(ctxt);
13662 
13663     return ret;
13664 }
13665 #endif /* LIBXML_SAX1_ENABLED */
13666 
13667 /**
13668  * xmlCreateDocParserCtxt:
13669  * @cur:  a pointer to an array of xmlChar
13670  *
13671  * Creates a parser context for an XML in-memory document.
13672  *
13673  * Returns the new parser context or NULL
13674  */
13675 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)13676 xmlCreateDocParserCtxt(const xmlChar *cur) {
13677     int len;
13678 
13679     if (cur == NULL)
13680 	return(NULL);
13681     len = xmlStrlen(cur);
13682     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13683 }
13684 
13685 #ifdef LIBXML_SAX1_ENABLED
13686 /**
13687  * xmlSAXParseDoc:
13688  * @sax:  the SAX handler block
13689  * @cur:  a pointer to an array of xmlChar
13690  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13691  *             documents
13692  *
13693  * parse an XML in-memory document and build a tree.
13694  * It use the given SAX function block to handle the parsing callback.
13695  * If sax is NULL, fallback to the default DOM tree building routines.
13696  *
13697  * Returns the resulting document tree
13698  */
13699 
13700 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)13701 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13702     xmlDocPtr ret;
13703     xmlParserCtxtPtr ctxt;
13704     xmlSAXHandlerPtr oldsax = NULL;
13705 
13706     if (cur == NULL) return(NULL);
13707 
13708 
13709     ctxt = xmlCreateDocParserCtxt(cur);
13710     if (ctxt == NULL) return(NULL);
13711     if (sax != NULL) {
13712         oldsax = ctxt->sax;
13713         ctxt->sax = sax;
13714         ctxt->userData = NULL;
13715     }
13716     xmlDetectSAX2(ctxt);
13717 
13718     xmlParseDocument(ctxt);
13719     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13720     else {
13721        ret = NULL;
13722        xmlFreeDoc(ctxt->myDoc);
13723        ctxt->myDoc = NULL;
13724     }
13725     if (sax != NULL)
13726 	ctxt->sax = oldsax;
13727     xmlFreeParserCtxt(ctxt);
13728 
13729     return(ret);
13730 }
13731 
13732 /**
13733  * xmlParseDoc:
13734  * @cur:  a pointer to an array of xmlChar
13735  *
13736  * parse an XML in-memory document and build a tree.
13737  *
13738  * Returns the resulting document tree
13739  */
13740 
13741 xmlDocPtr
xmlParseDoc(const xmlChar * cur)13742 xmlParseDoc(const xmlChar *cur) {
13743     return(xmlSAXParseDoc(NULL, cur, 0));
13744 }
13745 #endif /* LIBXML_SAX1_ENABLED */
13746 
13747 #ifdef LIBXML_LEGACY_ENABLED
13748 /************************************************************************
13749  *									*
13750  * 	Specific function to keep track of entities references		*
13751  * 	and used by the XSLT debugger					*
13752  *									*
13753  ************************************************************************/
13754 
13755 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13756 
13757 /**
13758  * xmlAddEntityReference:
13759  * @ent : A valid entity
13760  * @firstNode : A valid first node for children of entity
13761  * @lastNode : A valid last node of children entity
13762  *
13763  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13764  */
13765 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)13766 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13767                       xmlNodePtr lastNode)
13768 {
13769     if (xmlEntityRefFunc != NULL) {
13770         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13771     }
13772 }
13773 
13774 
13775 /**
13776  * xmlSetEntityReferenceFunc:
13777  * @func: A valid function
13778  *
13779  * Set the function to call call back when a xml reference has been made
13780  */
13781 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)13782 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13783 {
13784     xmlEntityRefFunc = func;
13785 }
13786 #endif /* LIBXML_LEGACY_ENABLED */
13787 
13788 /************************************************************************
13789  *									*
13790  * 				Miscellaneous				*
13791  *									*
13792  ************************************************************************/
13793 
13794 #ifdef LIBXML_XPATH_ENABLED
13795 #include <libxml/xpath.h>
13796 #endif
13797 
13798 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
13799 static int xmlParserInitialized = 0;
13800 
13801 /**
13802  * xmlInitParser:
13803  *
13804  * Initialization function for the XML parser.
13805  * This is not reentrant. Call once before processing in case of
13806  * use in multithreaded programs.
13807  */
13808 
13809 void
xmlInitParser(void)13810 xmlInitParser(void) {
13811     if (xmlParserInitialized != 0)
13812 	return;
13813 
13814 #ifdef LIBXML_THREAD_ENABLED
13815     __xmlGlobalInitMutexLock();
13816     if (xmlParserInitialized == 0) {
13817 #endif
13818 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13819 	    (xmlGenericError == NULL))
13820 	    initGenericErrorDefaultFunc(NULL);
13821 	xmlInitGlobals();
13822 	xmlInitThreads();
13823 	xmlInitMemory();
13824 	xmlInitCharEncodingHandlers();
13825 	xmlDefaultSAXHandlerInit();
13826 	xmlRegisterDefaultInputCallbacks();
13827 #ifdef LIBXML_OUTPUT_ENABLED
13828 	xmlRegisterDefaultOutputCallbacks();
13829 #endif /* LIBXML_OUTPUT_ENABLED */
13830 #ifdef LIBXML_HTML_ENABLED
13831 	htmlInitAutoClose();
13832 	htmlDefaultSAXHandlerInit();
13833 #endif
13834 #ifdef LIBXML_XPATH_ENABLED
13835 	xmlXPathInit();
13836 #endif
13837 	xmlParserInitialized = 1;
13838 #ifdef LIBXML_THREAD_ENABLED
13839     }
13840     __xmlGlobalInitMutexUnlock();
13841 #endif
13842 }
13843 
13844 /**
13845  * xmlCleanupParser:
13846  *
13847  * This function name is somewhat misleading. It does not clean up
13848  * parser state, it cleans up memory allocated by the library itself.
13849  * It is a cleanup function for the XML library. It tries to reclaim all
13850  * related global memory allocated for the library processing.
13851  * It doesn't deallocate any document related memory. One should
13852  * call xmlCleanupParser() only when the process has finished using
13853  * the library and all XML/HTML documents built with it.
13854  * See also xmlInitParser() which has the opposite function of preparing
13855  * the library for operations.
13856  */
13857 
13858 void
xmlCleanupParser(void)13859 xmlCleanupParser(void) {
13860     if (!xmlParserInitialized)
13861 	return;
13862 
13863     xmlCleanupCharEncodingHandlers();
13864 #ifdef LIBXML_CATALOG_ENABLED
13865     xmlCatalogCleanup();
13866 #endif
13867     xmlDictCleanup();
13868     xmlCleanupInputCallbacks();
13869 #ifdef LIBXML_OUTPUT_ENABLED
13870     xmlCleanupOutputCallbacks();
13871 #endif
13872 #ifdef LIBXML_SCHEMAS_ENABLED
13873     xmlSchemaCleanupTypes();
13874     xmlRelaxNGCleanupTypes();
13875 #endif
13876     xmlCleanupGlobals();
13877     xmlResetLastError();
13878     xmlCleanupThreads(); /* must be last if called not from the main thread */
13879     xmlCleanupMemory();
13880     xmlParserInitialized = 0;
13881 }
13882 
13883 /************************************************************************
13884  *									*
13885  *	New set (2.6.0) of simpler and more flexible APIs		*
13886  *									*
13887  ************************************************************************/
13888 
13889 /**
13890  * DICT_FREE:
13891  * @str:  a string
13892  *
13893  * Free a string if it is not owned by the "dict" dictionnary in the
13894  * current scope
13895  */
13896 #define DICT_FREE(str)						\
13897 	if ((str) && ((!dict) || 				\
13898 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
13899 	    xmlFree((char *)(str));
13900 
13901 /**
13902  * xmlCtxtReset:
13903  * @ctxt: an XML parser context
13904  *
13905  * Reset a parser context
13906  */
13907 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)13908 xmlCtxtReset(xmlParserCtxtPtr ctxt)
13909 {
13910     xmlParserInputPtr input;
13911     xmlDictPtr dict;
13912 
13913     if (ctxt == NULL)
13914         return;
13915 
13916     dict = ctxt->dict;
13917 
13918     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13919         xmlFreeInputStream(input);
13920     }
13921     ctxt->inputNr = 0;
13922     ctxt->input = NULL;
13923 
13924     ctxt->spaceNr = 0;
13925     if (ctxt->spaceTab != NULL) {
13926 	ctxt->spaceTab[0] = -1;
13927 	ctxt->space = &ctxt->spaceTab[0];
13928     } else {
13929         ctxt->space = NULL;
13930     }
13931 
13932 
13933     ctxt->nodeNr = 0;
13934     ctxt->node = NULL;
13935 
13936     ctxt->nameNr = 0;
13937     ctxt->name = NULL;
13938 
13939     DICT_FREE(ctxt->version);
13940     ctxt->version = NULL;
13941     DICT_FREE(ctxt->encoding);
13942     ctxt->encoding = NULL;
13943     DICT_FREE(ctxt->directory);
13944     ctxt->directory = NULL;
13945     DICT_FREE(ctxt->extSubURI);
13946     ctxt->extSubURI = NULL;
13947     DICT_FREE(ctxt->extSubSystem);
13948     ctxt->extSubSystem = NULL;
13949     if (ctxt->myDoc != NULL)
13950         xmlFreeDoc(ctxt->myDoc);
13951     ctxt->myDoc = NULL;
13952 
13953     ctxt->standalone = -1;
13954     ctxt->hasExternalSubset = 0;
13955     ctxt->hasPErefs = 0;
13956     ctxt->html = 0;
13957     ctxt->external = 0;
13958     ctxt->instate = XML_PARSER_START;
13959     ctxt->token = 0;
13960 
13961     ctxt->wellFormed = 1;
13962     ctxt->nsWellFormed = 1;
13963     ctxt->disableSAX = 0;
13964     ctxt->valid = 1;
13965 #if 0
13966     ctxt->vctxt.userData = ctxt;
13967     ctxt->vctxt.error = xmlParserValidityError;
13968     ctxt->vctxt.warning = xmlParserValidityWarning;
13969 #endif
13970     ctxt->record_info = 0;
13971     ctxt->nbChars = 0;
13972     ctxt->checkIndex = 0;
13973     ctxt->inSubset = 0;
13974     ctxt->errNo = XML_ERR_OK;
13975     ctxt->depth = 0;
13976     ctxt->charset = XML_CHAR_ENCODING_UTF8;
13977     ctxt->catalogs = NULL;
13978     ctxt->nbentities = 0;
13979     ctxt->sizeentities = 0;
13980     xmlInitNodeInfoSeq(&ctxt->node_seq);
13981 
13982     if (ctxt->attsDefault != NULL) {
13983         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13984         ctxt->attsDefault = NULL;
13985     }
13986     if (ctxt->attsSpecial != NULL) {
13987         xmlHashFree(ctxt->attsSpecial, NULL);
13988         ctxt->attsSpecial = NULL;
13989     }
13990 
13991 #ifdef LIBXML_CATALOG_ENABLED
13992     if (ctxt->catalogs != NULL)
13993 	xmlCatalogFreeLocal(ctxt->catalogs);
13994 #endif
13995     if (ctxt->lastError.code != XML_ERR_OK)
13996         xmlResetError(&ctxt->lastError);
13997 }
13998 
13999 /**
14000  * xmlCtxtResetPush:
14001  * @ctxt: an XML parser context
14002  * @chunk:  a pointer to an array of chars
14003  * @size:  number of chars in the array
14004  * @filename:  an optional file name or URI
14005  * @encoding:  the document encoding, or NULL
14006  *
14007  * Reset a push parser context
14008  *
14009  * Returns 0 in case of success and 1 in case of error
14010  */
14011 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14012 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14013                  int size, const char *filename, const char *encoding)
14014 {
14015     xmlParserInputPtr inputStream;
14016     xmlParserInputBufferPtr buf;
14017     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14018 
14019     if (ctxt == NULL)
14020         return(1);
14021 
14022     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14023         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14024 
14025     buf = xmlAllocParserInputBuffer(enc);
14026     if (buf == NULL)
14027         return(1);
14028 
14029     if (ctxt == NULL) {
14030         xmlFreeParserInputBuffer(buf);
14031         return(1);
14032     }
14033 
14034     xmlCtxtReset(ctxt);
14035 
14036     if (ctxt->pushTab == NULL) {
14037         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14038 	                                    sizeof(xmlChar *));
14039         if (ctxt->pushTab == NULL) {
14040 	    xmlErrMemory(ctxt, NULL);
14041             xmlFreeParserInputBuffer(buf);
14042             return(1);
14043         }
14044     }
14045 
14046     if (filename == NULL) {
14047         ctxt->directory = NULL;
14048     } else {
14049         ctxt->directory = xmlParserGetDirectory(filename);
14050     }
14051 
14052     inputStream = xmlNewInputStream(ctxt);
14053     if (inputStream == NULL) {
14054         xmlFreeParserInputBuffer(buf);
14055         return(1);
14056     }
14057 
14058     if (filename == NULL)
14059         inputStream->filename = NULL;
14060     else
14061         inputStream->filename = (char *)
14062             xmlCanonicPath((const xmlChar *) filename);
14063     inputStream->buf = buf;
14064     inputStream->base = inputStream->buf->buffer->content;
14065     inputStream->cur = inputStream->buf->buffer->content;
14066     inputStream->end =
14067         &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14068 
14069     inputPush(ctxt, inputStream);
14070 
14071     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14072         (ctxt->input->buf != NULL)) {
14073         int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14074         int cur = ctxt->input->cur - ctxt->input->base;
14075 
14076         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14077 
14078         ctxt->input->base = ctxt->input->buf->buffer->content + base;
14079         ctxt->input->cur = ctxt->input->base + cur;
14080         ctxt->input->end =
14081             &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14082                                                use];
14083 #ifdef DEBUG_PUSH
14084         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14085 #endif
14086     }
14087 
14088     if (encoding != NULL) {
14089         xmlCharEncodingHandlerPtr hdlr;
14090 
14091         if (ctxt->encoding != NULL)
14092 	    xmlFree((xmlChar *) ctxt->encoding);
14093         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14094 
14095         hdlr = xmlFindCharEncodingHandler(encoding);
14096         if (hdlr != NULL) {
14097             xmlSwitchToEncoding(ctxt, hdlr);
14098 	} else {
14099 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14100 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14101         }
14102     } else if (enc != XML_CHAR_ENCODING_NONE) {
14103         xmlSwitchEncoding(ctxt, enc);
14104     }
14105 
14106     return(0);
14107 }
14108 
14109 
14110 /**
14111  * xmlCtxtUseOptionsInternal:
14112  * @ctxt: an XML parser context
14113  * @options:  a combination of xmlParserOption
14114  * @encoding:  the user provided encoding to use
14115  *
14116  * Applies the options to the parser context
14117  *
14118  * Returns 0 in case of success, the set of unknown or unimplemented options
14119  *         in case of error.
14120  */
14121 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14122 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14123 {
14124     if (ctxt == NULL)
14125         return(-1);
14126     if (encoding != NULL) {
14127         if (ctxt->encoding != NULL)
14128 	    xmlFree((xmlChar *) ctxt->encoding);
14129         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14130     }
14131     if (options & XML_PARSE_RECOVER) {
14132         ctxt->recovery = 1;
14133         options -= XML_PARSE_RECOVER;
14134 	ctxt->options |= XML_PARSE_RECOVER;
14135     } else
14136         ctxt->recovery = 0;
14137     if (options & XML_PARSE_DTDLOAD) {
14138         ctxt->loadsubset = XML_DETECT_IDS;
14139         options -= XML_PARSE_DTDLOAD;
14140 	ctxt->options |= XML_PARSE_DTDLOAD;
14141     } else
14142         ctxt->loadsubset = 0;
14143     if (options & XML_PARSE_DTDATTR) {
14144         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14145         options -= XML_PARSE_DTDATTR;
14146 	ctxt->options |= XML_PARSE_DTDATTR;
14147     }
14148     if (options & XML_PARSE_NOENT) {
14149         ctxt->replaceEntities = 1;
14150         /* ctxt->loadsubset |= XML_DETECT_IDS; */
14151         options -= XML_PARSE_NOENT;
14152 	ctxt->options |= XML_PARSE_NOENT;
14153     } else
14154         ctxt->replaceEntities = 0;
14155     if (options & XML_PARSE_PEDANTIC) {
14156         ctxt->pedantic = 1;
14157         options -= XML_PARSE_PEDANTIC;
14158 	ctxt->options |= XML_PARSE_PEDANTIC;
14159     } else
14160         ctxt->pedantic = 0;
14161     if (options & XML_PARSE_NOBLANKS) {
14162         ctxt->keepBlanks = 0;
14163         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14164         options -= XML_PARSE_NOBLANKS;
14165 	ctxt->options |= XML_PARSE_NOBLANKS;
14166     } else
14167         ctxt->keepBlanks = 1;
14168     if (options & XML_PARSE_DTDVALID) {
14169         ctxt->validate = 1;
14170         if (options & XML_PARSE_NOWARNING)
14171             ctxt->vctxt.warning = NULL;
14172         if (options & XML_PARSE_NOERROR)
14173             ctxt->vctxt.error = NULL;
14174         options -= XML_PARSE_DTDVALID;
14175 	ctxt->options |= XML_PARSE_DTDVALID;
14176     } else
14177         ctxt->validate = 0;
14178     if (options & XML_PARSE_NOWARNING) {
14179         ctxt->sax->warning = NULL;
14180         options -= XML_PARSE_NOWARNING;
14181     }
14182     if (options & XML_PARSE_NOERROR) {
14183         ctxt->sax->error = NULL;
14184         ctxt->sax->fatalError = NULL;
14185         options -= XML_PARSE_NOERROR;
14186     }
14187 #ifdef LIBXML_SAX1_ENABLED
14188     if (options & XML_PARSE_SAX1) {
14189         ctxt->sax->startElement = xmlSAX2StartElement;
14190         ctxt->sax->endElement = xmlSAX2EndElement;
14191         ctxt->sax->startElementNs = NULL;
14192         ctxt->sax->endElementNs = NULL;
14193         ctxt->sax->initialized = 1;
14194         options -= XML_PARSE_SAX1;
14195 	ctxt->options |= XML_PARSE_SAX1;
14196     }
14197 #endif /* LIBXML_SAX1_ENABLED */
14198     if (options & XML_PARSE_NODICT) {
14199         ctxt->dictNames = 0;
14200         options -= XML_PARSE_NODICT;
14201 	ctxt->options |= XML_PARSE_NODICT;
14202     } else {
14203         ctxt->dictNames = 1;
14204     }
14205     if (options & XML_PARSE_NOCDATA) {
14206         ctxt->sax->cdataBlock = NULL;
14207         options -= XML_PARSE_NOCDATA;
14208 	ctxt->options |= XML_PARSE_NOCDATA;
14209     }
14210     if (options & XML_PARSE_NSCLEAN) {
14211 	ctxt->options |= XML_PARSE_NSCLEAN;
14212         options -= XML_PARSE_NSCLEAN;
14213     }
14214     if (options & XML_PARSE_NONET) {
14215 	ctxt->options |= XML_PARSE_NONET;
14216         options -= XML_PARSE_NONET;
14217     }
14218     if (options & XML_PARSE_COMPACT) {
14219 	ctxt->options |= XML_PARSE_COMPACT;
14220         options -= XML_PARSE_COMPACT;
14221     }
14222     if (options & XML_PARSE_OLD10) {
14223 	ctxt->options |= XML_PARSE_OLD10;
14224         options -= XML_PARSE_OLD10;
14225     }
14226     if (options & XML_PARSE_NOBASEFIX) {
14227 	ctxt->options |= XML_PARSE_NOBASEFIX;
14228         options -= XML_PARSE_NOBASEFIX;
14229     }
14230     if (options & XML_PARSE_HUGE) {
14231 	ctxt->options |= XML_PARSE_HUGE;
14232         options -= XML_PARSE_HUGE;
14233     }
14234     if (options & XML_PARSE_OLDSAX) {
14235 	ctxt->options |= XML_PARSE_OLDSAX;
14236         options -= XML_PARSE_OLDSAX;
14237     }
14238     ctxt->linenumbers = 1;
14239     return (options);
14240 }
14241 
14242 /**
14243  * xmlCtxtUseOptions:
14244  * @ctxt: an XML parser context
14245  * @options:  a combination of xmlParserOption
14246  *
14247  * Applies the options to the parser context
14248  *
14249  * Returns 0 in case of success, the set of unknown or unimplemented options
14250  *         in case of error.
14251  */
14252 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14253 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14254 {
14255    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14256 }
14257 
14258 /**
14259  * xmlDoRead:
14260  * @ctxt:  an XML parser context
14261  * @URL:  the base URL to use for the document
14262  * @encoding:  the document encoding, or NULL
14263  * @options:  a combination of xmlParserOption
14264  * @reuse:  keep the context for reuse
14265  *
14266  * Common front-end for the xmlRead functions
14267  *
14268  * Returns the resulting document tree or NULL
14269  */
14270 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14271 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14272           int options, int reuse)
14273 {
14274     xmlDocPtr ret;
14275 
14276     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14277     if (encoding != NULL) {
14278         xmlCharEncodingHandlerPtr hdlr;
14279 
14280 	hdlr = xmlFindCharEncodingHandler(encoding);
14281 	if (hdlr != NULL)
14282 	    xmlSwitchToEncoding(ctxt, hdlr);
14283     }
14284     if ((URL != NULL) && (ctxt->input != NULL) &&
14285         (ctxt->input->filename == NULL))
14286         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14287     xmlParseDocument(ctxt);
14288     if ((ctxt->wellFormed) || ctxt->recovery)
14289         ret = ctxt->myDoc;
14290     else {
14291         ret = NULL;
14292 	if (ctxt->myDoc != NULL) {
14293 	    xmlFreeDoc(ctxt->myDoc);
14294 	}
14295     }
14296     ctxt->myDoc = NULL;
14297     if (!reuse) {
14298 	xmlFreeParserCtxt(ctxt);
14299     }
14300 
14301     return (ret);
14302 }
14303 
14304 /**
14305  * xmlReadDoc:
14306  * @cur:  a pointer to a zero terminated string
14307  * @URL:  the base URL to use for the document
14308  * @encoding:  the document encoding, or NULL
14309  * @options:  a combination of xmlParserOption
14310  *
14311  * parse an XML in-memory document and build a tree.
14312  *
14313  * Returns the resulting document tree
14314  */
14315 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14316 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14317 {
14318     xmlParserCtxtPtr ctxt;
14319 
14320     if (cur == NULL)
14321         return (NULL);
14322 
14323     ctxt = xmlCreateDocParserCtxt(cur);
14324     if (ctxt == NULL)
14325         return (NULL);
14326     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14327 }
14328 
14329 /**
14330  * xmlReadFile:
14331  * @filename:  a file or URL
14332  * @encoding:  the document encoding, or NULL
14333  * @options:  a combination of xmlParserOption
14334  *
14335  * parse an XML file from the filesystem or the network.
14336  *
14337  * Returns the resulting document tree
14338  */
14339 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14340 xmlReadFile(const char *filename, const char *encoding, int options)
14341 {
14342     xmlParserCtxtPtr ctxt;
14343 
14344     ctxt = xmlCreateURLParserCtxt(filename, options);
14345     if (ctxt == NULL)
14346         return (NULL);
14347     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14348 }
14349 
14350 /**
14351  * xmlReadMemory:
14352  * @buffer:  a pointer to a char array
14353  * @size:  the size of the array
14354  * @URL:  the base URL to use for the document
14355  * @encoding:  the document encoding, or NULL
14356  * @options:  a combination of xmlParserOption
14357  *
14358  * parse an XML in-memory document and build a tree.
14359  *
14360  * Returns the resulting document tree
14361  */
14362 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14363 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14364 {
14365     xmlParserCtxtPtr ctxt;
14366 
14367     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14368     if (ctxt == NULL)
14369         return (NULL);
14370     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14371 }
14372 
14373 /**
14374  * xmlReadFd:
14375  * @fd:  an open file descriptor
14376  * @URL:  the base URL to use for the document
14377  * @encoding:  the document encoding, or NULL
14378  * @options:  a combination of xmlParserOption
14379  *
14380  * parse an XML from a file descriptor and build a tree.
14381  * NOTE that the file descriptor will not be closed when the
14382  *      reader is closed or reset.
14383  *
14384  * Returns the resulting document tree
14385  */
14386 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14387 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14388 {
14389     xmlParserCtxtPtr ctxt;
14390     xmlParserInputBufferPtr input;
14391     xmlParserInputPtr stream;
14392 
14393     if (fd < 0)
14394         return (NULL);
14395 
14396     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14397     if (input == NULL)
14398         return (NULL);
14399     input->closecallback = NULL;
14400     ctxt = xmlNewParserCtxt();
14401     if (ctxt == NULL) {
14402         xmlFreeParserInputBuffer(input);
14403         return (NULL);
14404     }
14405     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14406     if (stream == NULL) {
14407         xmlFreeParserInputBuffer(input);
14408 	xmlFreeParserCtxt(ctxt);
14409         return (NULL);
14410     }
14411     inputPush(ctxt, stream);
14412     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14413 }
14414 
14415 /**
14416  * xmlReadIO:
14417  * @ioread:  an I/O read function
14418  * @ioclose:  an I/O close function
14419  * @ioctx:  an I/O handler
14420  * @URL:  the base URL to use for the document
14421  * @encoding:  the document encoding, or NULL
14422  * @options:  a combination of xmlParserOption
14423  *
14424  * parse an XML document from I/O functions and source and build a tree.
14425  *
14426  * Returns the resulting document tree
14427  */
14428 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14429 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14430           void *ioctx, const char *URL, const char *encoding, int options)
14431 {
14432     xmlParserCtxtPtr ctxt;
14433     xmlParserInputBufferPtr input;
14434     xmlParserInputPtr stream;
14435 
14436     if (ioread == NULL)
14437         return (NULL);
14438 
14439     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14440                                          XML_CHAR_ENCODING_NONE);
14441     if (input == NULL)
14442         return (NULL);
14443     ctxt = xmlNewParserCtxt();
14444     if (ctxt == NULL) {
14445         xmlFreeParserInputBuffer(input);
14446         return (NULL);
14447     }
14448     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14449     if (stream == NULL) {
14450         xmlFreeParserInputBuffer(input);
14451 	xmlFreeParserCtxt(ctxt);
14452         return (NULL);
14453     }
14454     inputPush(ctxt, stream);
14455     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14456 }
14457 
14458 /**
14459  * xmlCtxtReadDoc:
14460  * @ctxt:  an XML parser context
14461  * @cur:  a pointer to a zero terminated string
14462  * @URL:  the base URL to use for the document
14463  * @encoding:  the document encoding, or NULL
14464  * @options:  a combination of xmlParserOption
14465  *
14466  * parse an XML in-memory document and build a tree.
14467  * This reuses the existing @ctxt parser context
14468  *
14469  * Returns the resulting document tree
14470  */
14471 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)14472 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14473                const char *URL, const char *encoding, int options)
14474 {
14475     xmlParserInputPtr stream;
14476 
14477     if (cur == NULL)
14478         return (NULL);
14479     if (ctxt == NULL)
14480         return (NULL);
14481 
14482     xmlCtxtReset(ctxt);
14483 
14484     stream = xmlNewStringInputStream(ctxt, cur);
14485     if (stream == NULL) {
14486         return (NULL);
14487     }
14488     inputPush(ctxt, stream);
14489     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14490 }
14491 
14492 /**
14493  * xmlCtxtReadFile:
14494  * @ctxt:  an XML parser context
14495  * @filename:  a file or URL
14496  * @encoding:  the document encoding, or NULL
14497  * @options:  a combination of xmlParserOption
14498  *
14499  * parse an XML file from the filesystem or the network.
14500  * This reuses the existing @ctxt parser context
14501  *
14502  * Returns the resulting document tree
14503  */
14504 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)14505 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14506                 const char *encoding, int options)
14507 {
14508     xmlParserInputPtr stream;
14509 
14510     if (filename == NULL)
14511         return (NULL);
14512     if (ctxt == NULL)
14513         return (NULL);
14514 
14515     xmlCtxtReset(ctxt);
14516 
14517     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14518     if (stream == NULL) {
14519         return (NULL);
14520     }
14521     inputPush(ctxt, stream);
14522     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14523 }
14524 
14525 /**
14526  * xmlCtxtReadMemory:
14527  * @ctxt:  an XML parser context
14528  * @buffer:  a pointer to a char array
14529  * @size:  the size of the array
14530  * @URL:  the base URL to use for the document
14531  * @encoding:  the document encoding, or NULL
14532  * @options:  a combination of xmlParserOption
14533  *
14534  * parse an XML in-memory document and build a tree.
14535  * This reuses the existing @ctxt parser context
14536  *
14537  * Returns the resulting document tree
14538  */
14539 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14540 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14541                   const char *URL, const char *encoding, int options)
14542 {
14543     xmlParserInputBufferPtr input;
14544     xmlParserInputPtr stream;
14545 
14546     if (ctxt == NULL)
14547         return (NULL);
14548     if (buffer == NULL)
14549         return (NULL);
14550 
14551     xmlCtxtReset(ctxt);
14552 
14553     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14554     if (input == NULL) {
14555 	return(NULL);
14556     }
14557 
14558     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14559     if (stream == NULL) {
14560 	xmlFreeParserInputBuffer(input);
14561 	return(NULL);
14562     }
14563 
14564     inputPush(ctxt, stream);
14565     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14566 }
14567 
14568 /**
14569  * xmlCtxtReadFd:
14570  * @ctxt:  an XML parser context
14571  * @fd:  an open file descriptor
14572  * @URL:  the base URL to use for the document
14573  * @encoding:  the document encoding, or NULL
14574  * @options:  a combination of xmlParserOption
14575  *
14576  * parse an XML from a file descriptor and build a tree.
14577  * This reuses the existing @ctxt parser context
14578  * NOTE that the file descriptor will not be closed when the
14579  *      reader is closed or reset.
14580  *
14581  * Returns the resulting document tree
14582  */
14583 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14584 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14585               const char *URL, const char *encoding, int options)
14586 {
14587     xmlParserInputBufferPtr input;
14588     xmlParserInputPtr stream;
14589 
14590     if (fd < 0)
14591         return (NULL);
14592     if (ctxt == NULL)
14593         return (NULL);
14594 
14595     xmlCtxtReset(ctxt);
14596 
14597 
14598     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14599     if (input == NULL)
14600         return (NULL);
14601     input->closecallback = NULL;
14602     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14603     if (stream == NULL) {
14604         xmlFreeParserInputBuffer(input);
14605         return (NULL);
14606     }
14607     inputPush(ctxt, stream);
14608     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14609 }
14610 
14611 /**
14612  * xmlCtxtReadIO:
14613  * @ctxt:  an XML parser context
14614  * @ioread:  an I/O read function
14615  * @ioclose:  an I/O close function
14616  * @ioctx:  an I/O handler
14617  * @URL:  the base URL to use for the document
14618  * @encoding:  the document encoding, or NULL
14619  * @options:  a combination of xmlParserOption
14620  *
14621  * parse an XML document from I/O functions and source and build a tree.
14622  * This reuses the existing @ctxt parser context
14623  *
14624  * Returns the resulting document tree
14625  */
14626 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14627 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14628               xmlInputCloseCallback ioclose, void *ioctx,
14629 	      const char *URL,
14630               const char *encoding, int options)
14631 {
14632     xmlParserInputBufferPtr input;
14633     xmlParserInputPtr stream;
14634 
14635     if (ioread == NULL)
14636         return (NULL);
14637     if (ctxt == NULL)
14638         return (NULL);
14639 
14640     xmlCtxtReset(ctxt);
14641 
14642     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14643                                          XML_CHAR_ENCODING_NONE);
14644     if (input == NULL)
14645         return (NULL);
14646     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14647     if (stream == NULL) {
14648         xmlFreeParserInputBuffer(input);
14649         return (NULL);
14650     }
14651     inputPush(ctxt, stream);
14652     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14653 }
14654 
14655 #define bottom_parser
14656 #include "elfgcchack.h"
14657