• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 #define IN_LIBXML
34 #include "libxml.h"
35 
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41 
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
82 #ifdef HAVE_LZMA_H
83 #include <lzma.h>
84 #endif
85 
86 static void
87 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
88 
89 static xmlParserCtxtPtr
90 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
91 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
92 
93 /************************************************************************
94  *									*
95  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
96  *									*
97  ************************************************************************/
98 
99 #define XML_PARSER_BIG_ENTITY 1000
100 #define XML_PARSER_LOT_ENTITY 5000
101 
102 /*
103  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
104  *    replacement over the size in byte of the input indicates that you have
105  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
106  *    replacement per byte of input.
107  */
108 #define XML_PARSER_NON_LINEAR 10
109 
110 /*
111  * xmlParserEntityCheck
112  *
113  * Function to check non-linear entity expansion behaviour
114  * This is here to detect and stop exponential linear entity expansion
115  * This is not a limitation of the parser but a safety
116  * boundary feature. It can be disabled with the XML_PARSE_HUGE
117  * parser option.
118  */
119 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long size,xmlEntityPtr ent)120 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
121                      xmlEntityPtr ent)
122 {
123     unsigned long consumed = 0;
124 
125     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
126         return (0);
127     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
128         return (1);
129     if (size != 0) {
130         /*
131          * Do the check based on the replacement size of the entity
132          */
133         if (size < XML_PARSER_BIG_ENTITY)
134 	    return(0);
135 
136         /*
137          * A limit on the amount of text data reasonably used
138          */
139         if (ctxt->input != NULL) {
140             consumed = ctxt->input->consumed +
141                 (ctxt->input->cur - ctxt->input->base);
142         }
143         consumed += ctxt->sizeentities;
144 
145         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
146 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
147             return (0);
148     } else if (ent != NULL) {
149         /*
150          * use the number of parsed entities in the replacement
151          */
152         size = ent->checked;
153 
154         /*
155          * The amount of data parsed counting entities size only once
156          */
157         if (ctxt->input != NULL) {
158             consumed = ctxt->input->consumed +
159                 (ctxt->input->cur - ctxt->input->base);
160         }
161         consumed += ctxt->sizeentities;
162 
163         /*
164          * Check the density of entities for the amount of data
165 	 * knowing an entity reference will take at least 3 bytes
166          */
167         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
168             return (0);
169     } else {
170         /*
171          * strange we got no data for checking just return
172          */
173         return (0);
174     }
175 
176     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
177     return (1);
178 }
179 
180 /**
181  * xmlParserMaxDepth:
182  *
183  * arbitrary depth limit for the XML documents that we allow to
184  * process. This is not a limitation of the parser but a safety
185  * boundary feature. It can be disabled with the XML_PARSE_HUGE
186  * parser option.
187  */
188 unsigned int xmlParserMaxDepth = 256;
189 
190 
191 
192 #define SAX2 1
193 #define XML_PARSER_BIG_BUFFER_SIZE 300
194 #define XML_PARSER_BUFFER_SIZE 100
195 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
196 
197 /*
198  * List of XML prefixed PI allowed by W3C specs
199  */
200 
201 static const char *xmlW3CPIs[] = {
202     "xml-stylesheet",
203     "xml-model",
204     NULL
205 };
206 
207 
208 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
209 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
210                                               const xmlChar **str);
211 
212 static xmlParserErrors
213 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
214 	              xmlSAXHandlerPtr sax,
215 		      void *user_data, int depth, const xmlChar *URL,
216 		      const xmlChar *ID, xmlNodePtr *list);
217 
218 static int
219 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
220                           const char *encoding);
221 #ifdef LIBXML_LEGACY_ENABLED
222 static void
223 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
224                       xmlNodePtr lastNode);
225 #endif /* LIBXML_LEGACY_ENABLED */
226 
227 static xmlParserErrors
228 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
229 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
230 
231 static int
232 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
233 
234 /************************************************************************
235  *									*
236  * 		Some factorized error routines				*
237  *									*
238  ************************************************************************/
239 
240 /**
241  * xmlErrAttributeDup:
242  * @ctxt:  an XML parser context
243  * @prefix:  the attribute prefix
244  * @localname:  the attribute localname
245  *
246  * Handle a redefinition of attribute error
247  */
248 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)249 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
250                    const xmlChar * localname)
251 {
252     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253         (ctxt->instate == XML_PARSER_EOF))
254 	return;
255     if (ctxt != NULL)
256 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
257 
258     if (prefix == NULL)
259         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
260                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
261                         (const char *) localname, NULL, NULL, 0, 0,
262                         "Attribute %s redefined\n", localname);
263     else
264         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
265                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
266                         (const char *) prefix, (const char *) localname,
267                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
268                         localname);
269     if (ctxt != NULL) {
270 	ctxt->wellFormed = 0;
271 	if (ctxt->recovery == 0)
272 	    ctxt->disableSAX = 1;
273     }
274 }
275 
276 /**
277  * xmlFatalErr:
278  * @ctxt:  an XML parser context
279  * @error:  the error number
280  * @extra:  extra information string
281  *
282  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
283  */
284 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)285 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
286 {
287     const char *errmsg;
288 
289     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
290         (ctxt->instate == XML_PARSER_EOF))
291 	return;
292     switch (error) {
293         case XML_ERR_INVALID_HEX_CHARREF:
294             errmsg = "CharRef: invalid hexadecimal value\n";
295             break;
296         case XML_ERR_INVALID_DEC_CHARREF:
297             errmsg = "CharRef: invalid decimal value\n";
298             break;
299         case XML_ERR_INVALID_CHARREF:
300             errmsg = "CharRef: invalid value\n";
301             break;
302         case XML_ERR_INTERNAL_ERROR:
303             errmsg = "internal error";
304             break;
305         case XML_ERR_PEREF_AT_EOF:
306             errmsg = "PEReference at end of document\n";
307             break;
308         case XML_ERR_PEREF_IN_PROLOG:
309             errmsg = "PEReference in prolog\n";
310             break;
311         case XML_ERR_PEREF_IN_EPILOG:
312             errmsg = "PEReference in epilog\n";
313             break;
314         case XML_ERR_PEREF_NO_NAME:
315             errmsg = "PEReference: no name\n";
316             break;
317         case XML_ERR_PEREF_SEMICOL_MISSING:
318             errmsg = "PEReference: expecting ';'\n";
319             break;
320         case XML_ERR_ENTITY_LOOP:
321             errmsg = "Detected an entity reference loop\n";
322             break;
323         case XML_ERR_ENTITY_NOT_STARTED:
324             errmsg = "EntityValue: \" or ' expected\n";
325             break;
326         case XML_ERR_ENTITY_PE_INTERNAL:
327             errmsg = "PEReferences forbidden in internal subset\n";
328             break;
329         case XML_ERR_ENTITY_NOT_FINISHED:
330             errmsg = "EntityValue: \" or ' expected\n";
331             break;
332         case XML_ERR_ATTRIBUTE_NOT_STARTED:
333             errmsg = "AttValue: \" or ' expected\n";
334             break;
335         case XML_ERR_LT_IN_ATTRIBUTE:
336             errmsg = "Unescaped '<' not allowed in attributes values\n";
337             break;
338         case XML_ERR_LITERAL_NOT_STARTED:
339             errmsg = "SystemLiteral \" or ' expected\n";
340             break;
341         case XML_ERR_LITERAL_NOT_FINISHED:
342             errmsg = "Unfinished System or Public ID \" or ' expected\n";
343             break;
344         case XML_ERR_MISPLACED_CDATA_END:
345             errmsg = "Sequence ']]>' not allowed in content\n";
346             break;
347         case XML_ERR_URI_REQUIRED:
348             errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
349             break;
350         case XML_ERR_PUBID_REQUIRED:
351             errmsg = "PUBLIC, the Public Identifier is missing\n";
352             break;
353         case XML_ERR_HYPHEN_IN_COMMENT:
354             errmsg = "Comment must not contain '--' (double-hyphen)\n";
355             break;
356         case XML_ERR_PI_NOT_STARTED:
357             errmsg = "xmlParsePI : no target name\n";
358             break;
359         case XML_ERR_RESERVED_XML_NAME:
360             errmsg = "Invalid PI name\n";
361             break;
362         case XML_ERR_NOTATION_NOT_STARTED:
363             errmsg = "NOTATION: Name expected here\n";
364             break;
365         case XML_ERR_NOTATION_NOT_FINISHED:
366             errmsg = "'>' required to close NOTATION declaration\n";
367             break;
368         case XML_ERR_VALUE_REQUIRED:
369             errmsg = "Entity value required\n";
370             break;
371         case XML_ERR_URI_FRAGMENT:
372             errmsg = "Fragment not allowed";
373             break;
374         case XML_ERR_ATTLIST_NOT_STARTED:
375             errmsg = "'(' required to start ATTLIST enumeration\n";
376             break;
377         case XML_ERR_NMTOKEN_REQUIRED:
378             errmsg = "NmToken expected in ATTLIST enumeration\n";
379             break;
380         case XML_ERR_ATTLIST_NOT_FINISHED:
381             errmsg = "')' required to finish ATTLIST enumeration\n";
382             break;
383         case XML_ERR_MIXED_NOT_STARTED:
384             errmsg = "MixedContentDecl : '|' or ')*' expected\n";
385             break;
386         case XML_ERR_PCDATA_REQUIRED:
387             errmsg = "MixedContentDecl : '#PCDATA' expected\n";
388             break;
389         case XML_ERR_ELEMCONTENT_NOT_STARTED:
390             errmsg = "ContentDecl : Name or '(' expected\n";
391             break;
392         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
393             errmsg = "ContentDecl : ',' '|' or ')' expected\n";
394             break;
395         case XML_ERR_PEREF_IN_INT_SUBSET:
396             errmsg =
397                 "PEReference: forbidden within markup decl in internal subset\n";
398             break;
399         case XML_ERR_GT_REQUIRED:
400             errmsg = "expected '>'\n";
401             break;
402         case XML_ERR_CONDSEC_INVALID:
403             errmsg = "XML conditional section '[' expected\n";
404             break;
405         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
406             errmsg = "Content error in the external subset\n";
407             break;
408         case XML_ERR_CONDSEC_INVALID_KEYWORD:
409             errmsg =
410                 "conditional section INCLUDE or IGNORE keyword expected\n";
411             break;
412         case XML_ERR_CONDSEC_NOT_FINISHED:
413             errmsg = "XML conditional section not closed\n";
414             break;
415         case XML_ERR_XMLDECL_NOT_STARTED:
416             errmsg = "Text declaration '<?xml' required\n";
417             break;
418         case XML_ERR_XMLDECL_NOT_FINISHED:
419             errmsg = "parsing XML declaration: '?>' expected\n";
420             break;
421         case XML_ERR_EXT_ENTITY_STANDALONE:
422             errmsg = "external parsed entities cannot be standalone\n";
423             break;
424         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
425             errmsg = "EntityRef: expecting ';'\n";
426             break;
427         case XML_ERR_DOCTYPE_NOT_FINISHED:
428             errmsg = "DOCTYPE improperly terminated\n";
429             break;
430         case XML_ERR_LTSLASH_REQUIRED:
431             errmsg = "EndTag: '</' not found\n";
432             break;
433         case XML_ERR_EQUAL_REQUIRED:
434             errmsg = "expected '='\n";
435             break;
436         case XML_ERR_STRING_NOT_CLOSED:
437             errmsg = "String not closed expecting \" or '\n";
438             break;
439         case XML_ERR_STRING_NOT_STARTED:
440             errmsg = "String not started expecting ' or \"\n";
441             break;
442         case XML_ERR_ENCODING_NAME:
443             errmsg = "Invalid XML encoding name\n";
444             break;
445         case XML_ERR_STANDALONE_VALUE:
446             errmsg = "standalone accepts only 'yes' or 'no'\n";
447             break;
448         case XML_ERR_DOCUMENT_EMPTY:
449             errmsg = "Document is empty\n";
450             break;
451         case XML_ERR_DOCUMENT_END:
452             errmsg = "Extra content at the end of the document\n";
453             break;
454         case XML_ERR_NOT_WELL_BALANCED:
455             errmsg = "chunk is not well balanced\n";
456             break;
457         case XML_ERR_EXTRA_CONTENT:
458             errmsg = "extra content at the end of well balanced chunk\n";
459             break;
460         case XML_ERR_VERSION_MISSING:
461             errmsg = "Malformed declaration expecting version\n";
462             break;
463 #if 0
464         case:
465             errmsg = "\n";
466             break;
467 #endif
468         default:
469             errmsg = "Unregistered error message\n";
470     }
471     if (ctxt != NULL)
472 	ctxt->errNo = error;
473     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
474                     XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
475                     info);
476     if (ctxt != NULL) {
477 	ctxt->wellFormed = 0;
478 	if (ctxt->recovery == 0)
479 	    ctxt->disableSAX = 1;
480     }
481 }
482 
483 /**
484  * xmlFatalErrMsg:
485  * @ctxt:  an XML parser context
486  * @error:  the error number
487  * @msg:  the error message
488  *
489  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490  */
491 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)492 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493                const char *msg)
494 {
495     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496         (ctxt->instate == XML_PARSER_EOF))
497 	return;
498     if (ctxt != NULL)
499 	ctxt->errNo = error;
500     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
501                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
502     if (ctxt != NULL) {
503 	ctxt->wellFormed = 0;
504 	if (ctxt->recovery == 0)
505 	    ctxt->disableSAX = 1;
506     }
507 }
508 
509 /**
510  * xmlWarningMsg:
511  * @ctxt:  an XML parser context
512  * @error:  the error number
513  * @msg:  the error message
514  * @str1:  extra data
515  * @str2:  extra data
516  *
517  * Handle a warning.
518  */
519 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)520 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
521               const char *msg, const xmlChar *str1, const xmlChar *str2)
522 {
523     xmlStructuredErrorFunc schannel = NULL;
524 
525     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
526         (ctxt->instate == XML_PARSER_EOF))
527 	return;
528     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
529         (ctxt->sax->initialized == XML_SAX2_MAGIC))
530         schannel = ctxt->sax->serror;
531     if (ctxt != NULL) {
532         __xmlRaiseError(schannel,
533                     (ctxt->sax) ? ctxt->sax->warning : NULL,
534                     ctxt->userData,
535                     ctxt, NULL, XML_FROM_PARSER, error,
536                     XML_ERR_WARNING, NULL, 0,
537 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
538 		    msg, (const char *) str1, (const char *) str2);
539     } else {
540         __xmlRaiseError(schannel, NULL, NULL,
541                     ctxt, NULL, XML_FROM_PARSER, error,
542                     XML_ERR_WARNING, NULL, 0,
543 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
544 		    msg, (const char *) str1, (const char *) str2);
545     }
546 }
547 
548 /**
549  * xmlValidityError:
550  * @ctxt:  an XML parser context
551  * @error:  the error number
552  * @msg:  the error message
553  * @str1:  extra data
554  *
555  * Handle a validity error.
556  */
557 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)558 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
559               const char *msg, const xmlChar *str1, const xmlChar *str2)
560 {
561     xmlStructuredErrorFunc schannel = NULL;
562 
563     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
564         (ctxt->instate == XML_PARSER_EOF))
565 	return;
566     if (ctxt != NULL) {
567 	ctxt->errNo = error;
568 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
569 	    schannel = ctxt->sax->serror;
570     }
571     if (ctxt != NULL) {
572         __xmlRaiseError(schannel,
573                     ctxt->vctxt.error, ctxt->vctxt.userData,
574                     ctxt, NULL, XML_FROM_DTD, error,
575                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
576 		    (const char *) str2, NULL, 0, 0,
577 		    msg, (const char *) str1, (const char *) str2);
578 	ctxt->valid = 0;
579     } else {
580         __xmlRaiseError(schannel, NULL, NULL,
581                     ctxt, NULL, XML_FROM_DTD, error,
582                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
583 		    (const char *) str2, NULL, 0, 0,
584 		    msg, (const char *) str1, (const char *) str2);
585     }
586 }
587 
588 /**
589  * xmlFatalErrMsgInt:
590  * @ctxt:  an XML parser context
591  * @error:  the error number
592  * @msg:  the error message
593  * @val:  an integer value
594  *
595  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
596  */
597 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)598 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
599                   const char *msg, int val)
600 {
601     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
602         (ctxt->instate == XML_PARSER_EOF))
603 	return;
604     if (ctxt != NULL)
605 	ctxt->errNo = error;
606     __xmlRaiseError(NULL, NULL, NULL,
607                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
608                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
609     if (ctxt != NULL) {
610 	ctxt->wellFormed = 0;
611 	if (ctxt->recovery == 0)
612 	    ctxt->disableSAX = 1;
613     }
614 }
615 
616 /**
617  * xmlFatalErrMsgStrIntStr:
618  * @ctxt:  an XML parser context
619  * @error:  the error number
620  * @msg:  the error message
621  * @str1:  an string info
622  * @val:  an integer value
623  * @str2:  an string info
624  *
625  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
626  */
627 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)628 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629                   const char *msg, const xmlChar *str1, int val,
630 		  const xmlChar *str2)
631 {
632     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
633         (ctxt->instate == XML_PARSER_EOF))
634 	return;
635     if (ctxt != NULL)
636 	ctxt->errNo = error;
637     __xmlRaiseError(NULL, NULL, NULL,
638                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
639                     NULL, 0, (const char *) str1, (const char *) str2,
640 		    NULL, val, 0, msg, str1, val, str2);
641     if (ctxt != NULL) {
642 	ctxt->wellFormed = 0;
643 	if (ctxt->recovery == 0)
644 	    ctxt->disableSAX = 1;
645     }
646 }
647 
648 /**
649  * xmlFatalErrMsgStr:
650  * @ctxt:  an XML parser context
651  * @error:  the error number
652  * @msg:  the error message
653  * @val:  a string value
654  *
655  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
656  */
657 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)658 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
659                   const char *msg, const xmlChar * val)
660 {
661     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
662         (ctxt->instate == XML_PARSER_EOF))
663 	return;
664     if (ctxt != NULL)
665 	ctxt->errNo = error;
666     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
667                     XML_FROM_PARSER, error, XML_ERR_FATAL,
668                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
669                     val);
670     if (ctxt != NULL) {
671 	ctxt->wellFormed = 0;
672 	if (ctxt->recovery == 0)
673 	    ctxt->disableSAX = 1;
674     }
675 }
676 
677 /**
678  * xmlErrMsgStr:
679  * @ctxt:  an XML parser context
680  * @error:  the error number
681  * @msg:  the error message
682  * @val:  a string value
683  *
684  * Handle a non fatal parser error
685  */
686 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)687 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
688                   const char *msg, const xmlChar * val)
689 {
690     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
691         (ctxt->instate == XML_PARSER_EOF))
692 	return;
693     if (ctxt != NULL)
694 	ctxt->errNo = error;
695     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
696                     XML_FROM_PARSER, error, XML_ERR_ERROR,
697                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
698                     val);
699 }
700 
701 /**
702  * xmlNsErr:
703  * @ctxt:  an XML parser context
704  * @error:  the error number
705  * @msg:  the message
706  * @info1:  extra information string
707  * @info2:  extra information string
708  *
709  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
710  */
711 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)712 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
713          const char *msg,
714          const xmlChar * info1, const xmlChar * info2,
715          const xmlChar * info3)
716 {
717     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
718         (ctxt->instate == XML_PARSER_EOF))
719 	return;
720     if (ctxt != NULL)
721 	ctxt->errNo = error;
722     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
723                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
724                     (const char *) info2, (const char *) info3, 0, 0, msg,
725                     info1, info2, info3);
726     if (ctxt != NULL)
727 	ctxt->nsWellFormed = 0;
728 }
729 
730 /**
731  * xmlNsWarn
732  * @ctxt:  an XML parser context
733  * @error:  the error number
734  * @msg:  the message
735  * @info1:  extra information string
736  * @info2:  extra information string
737  *
738  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
739  */
740 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)741 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742          const char *msg,
743          const xmlChar * info1, const xmlChar * info2,
744          const xmlChar * info3)
745 {
746     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
747         (ctxt->instate == XML_PARSER_EOF))
748 	return;
749     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
750                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
751                     (const char *) info2, (const char *) info3, 0, 0, msg,
752                     info1, info2, info3);
753 }
754 
755 /************************************************************************
756  *									*
757  * 		Library wide options					*
758  *									*
759  ************************************************************************/
760 
761 /**
762   * xmlHasFeature:
763   * @feature: the feature to be examined
764   *
765   * Examines if the library has been compiled with a given feature.
766   *
767   * Returns a non-zero value if the feature exist, otherwise zero.
768   * Returns zero (0) if the feature does not exist or an unknown
769   * unknown feature is requested, non-zero otherwise.
770   */
771 int
xmlHasFeature(xmlFeature feature)772 xmlHasFeature(xmlFeature feature)
773 {
774     switch (feature) {
775 	case XML_WITH_THREAD:
776 #ifdef LIBXML_THREAD_ENABLED
777 	    return(1);
778 #else
779 	    return(0);
780 #endif
781         case XML_WITH_TREE:
782 #ifdef LIBXML_TREE_ENABLED
783             return(1);
784 #else
785             return(0);
786 #endif
787         case XML_WITH_OUTPUT:
788 #ifdef LIBXML_OUTPUT_ENABLED
789             return(1);
790 #else
791             return(0);
792 #endif
793         case XML_WITH_PUSH:
794 #ifdef LIBXML_PUSH_ENABLED
795             return(1);
796 #else
797             return(0);
798 #endif
799         case XML_WITH_READER:
800 #ifdef LIBXML_READER_ENABLED
801             return(1);
802 #else
803             return(0);
804 #endif
805         case XML_WITH_PATTERN:
806 #ifdef LIBXML_PATTERN_ENABLED
807             return(1);
808 #else
809             return(0);
810 #endif
811         case XML_WITH_WRITER:
812 #ifdef LIBXML_WRITER_ENABLED
813             return(1);
814 #else
815             return(0);
816 #endif
817         case XML_WITH_SAX1:
818 #ifdef LIBXML_SAX1_ENABLED
819             return(1);
820 #else
821             return(0);
822 #endif
823         case XML_WITH_FTP:
824 #ifdef LIBXML_FTP_ENABLED
825             return(1);
826 #else
827             return(0);
828 #endif
829         case XML_WITH_HTTP:
830 #ifdef LIBXML_HTTP_ENABLED
831             return(1);
832 #else
833             return(0);
834 #endif
835         case XML_WITH_VALID:
836 #ifdef LIBXML_VALID_ENABLED
837             return(1);
838 #else
839             return(0);
840 #endif
841         case XML_WITH_HTML:
842 #ifdef LIBXML_HTML_ENABLED
843             return(1);
844 #else
845             return(0);
846 #endif
847         case XML_WITH_LEGACY:
848 #ifdef LIBXML_LEGACY_ENABLED
849             return(1);
850 #else
851             return(0);
852 #endif
853         case XML_WITH_C14N:
854 #ifdef LIBXML_C14N_ENABLED
855             return(1);
856 #else
857             return(0);
858 #endif
859         case XML_WITH_CATALOG:
860 #ifdef LIBXML_CATALOG_ENABLED
861             return(1);
862 #else
863             return(0);
864 #endif
865         case XML_WITH_XPATH:
866 #ifdef LIBXML_XPATH_ENABLED
867             return(1);
868 #else
869             return(0);
870 #endif
871         case XML_WITH_XPTR:
872 #ifdef LIBXML_XPTR_ENABLED
873             return(1);
874 #else
875             return(0);
876 #endif
877         case XML_WITH_XINCLUDE:
878 #ifdef LIBXML_XINCLUDE_ENABLED
879             return(1);
880 #else
881             return(0);
882 #endif
883         case XML_WITH_ICONV:
884 #ifdef LIBXML_ICONV_ENABLED
885             return(1);
886 #else
887             return(0);
888 #endif
889         case XML_WITH_ISO8859X:
890 #ifdef LIBXML_ISO8859X_ENABLED
891             return(1);
892 #else
893             return(0);
894 #endif
895         case XML_WITH_UNICODE:
896 #ifdef LIBXML_UNICODE_ENABLED
897             return(1);
898 #else
899             return(0);
900 #endif
901         case XML_WITH_REGEXP:
902 #ifdef LIBXML_REGEXP_ENABLED
903             return(1);
904 #else
905             return(0);
906 #endif
907         case XML_WITH_AUTOMATA:
908 #ifdef LIBXML_AUTOMATA_ENABLED
909             return(1);
910 #else
911             return(0);
912 #endif
913         case XML_WITH_EXPR:
914 #ifdef LIBXML_EXPR_ENABLED
915             return(1);
916 #else
917             return(0);
918 #endif
919         case XML_WITH_SCHEMAS:
920 #ifdef LIBXML_SCHEMAS_ENABLED
921             return(1);
922 #else
923             return(0);
924 #endif
925         case XML_WITH_SCHEMATRON:
926 #ifdef LIBXML_SCHEMATRON_ENABLED
927             return(1);
928 #else
929             return(0);
930 #endif
931         case XML_WITH_MODULES:
932 #ifdef LIBXML_MODULES_ENABLED
933             return(1);
934 #else
935             return(0);
936 #endif
937         case XML_WITH_DEBUG:
938 #ifdef LIBXML_DEBUG_ENABLED
939             return(1);
940 #else
941             return(0);
942 #endif
943         case XML_WITH_DEBUG_MEM:
944 #ifdef DEBUG_MEMORY_LOCATION
945             return(1);
946 #else
947             return(0);
948 #endif
949         case XML_WITH_DEBUG_RUN:
950 #ifdef LIBXML_DEBUG_RUNTIME
951             return(1);
952 #else
953             return(0);
954 #endif
955         case XML_WITH_ZLIB:
956 #ifdef LIBXML_ZLIB_ENABLED
957             return(1);
958 #else
959             return(0);
960 #endif
961         case XML_WITH_LZMA:
962 #ifdef LIBXML_LZMA_ENABLED
963             return(1);
964 #else
965             return(0);
966 #endif
967         case XML_WITH_ICU:
968 #ifdef LIBXML_ICU_ENABLED
969             return(1);
970 #else
971             return(0);
972 #endif
973         default:
974 	    break;
975      }
976      return(0);
977 }
978 
979 /************************************************************************
980  *									*
981  * 		SAX2 defaulted attributes handling			*
982  *									*
983  ************************************************************************/
984 
985 /**
986  * xmlDetectSAX2:
987  * @ctxt:  an XML parser context
988  *
989  * Do the SAX2 detection and specific intialization
990  */
991 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)992 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
993     if (ctxt == NULL) return;
994 #ifdef LIBXML_SAX1_ENABLED
995     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
996         ((ctxt->sax->startElementNs != NULL) ||
997          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
998 #else
999     ctxt->sax2 = 1;
1000 #endif /* LIBXML_SAX1_ENABLED */
1001 
1002     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1003     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1004     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1005     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1006     		(ctxt->str_xml_ns == NULL)) {
1007         xmlErrMemory(ctxt, NULL);
1008     }
1009 }
1010 
1011 typedef struct _xmlDefAttrs xmlDefAttrs;
1012 typedef xmlDefAttrs *xmlDefAttrsPtr;
1013 struct _xmlDefAttrs {
1014     int nbAttrs;	/* number of defaulted attributes on that element */
1015     int maxAttrs;       /* the size of the array */
1016     const xmlChar *values[5]; /* array of localname/prefix/values/external */
1017 };
1018 
1019 /**
1020  * xmlAttrNormalizeSpace:
1021  * @src: the source string
1022  * @dst: the target string
1023  *
1024  * Normalize the space in non CDATA attribute values:
1025  * If the attribute type is not CDATA, then the XML processor MUST further
1026  * process the normalized attribute value by discarding any leading and
1027  * trailing space (#x20) characters, and by replacing sequences of space
1028  * (#x20) characters by a single space (#x20) character.
1029  * Note that the size of dst need to be at least src, and if one doesn't need
1030  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1031  * passing src as dst is just fine.
1032  *
1033  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034  *         is needed.
1035  */
1036 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1037 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1038 {
1039     if ((src == NULL) || (dst == NULL))
1040         return(NULL);
1041 
1042     while (*src == 0x20) src++;
1043     while (*src != 0) {
1044 	if (*src == 0x20) {
1045 	    while (*src == 0x20) src++;
1046 	    if (*src != 0)
1047 		*dst++ = 0x20;
1048 	} else {
1049 	    *dst++ = *src++;
1050 	}
1051     }
1052     *dst = 0;
1053     if (dst == src)
1054        return(NULL);
1055     return(dst);
1056 }
1057 
1058 /**
1059  * xmlAttrNormalizeSpace2:
1060  * @src: the source string
1061  *
1062  * Normalize the space in non CDATA attribute values, a slightly more complex
1063  * front end to avoid allocation problems when running on attribute values
1064  * coming from the input.
1065  *
1066  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1067  *         is needed.
1068  */
1069 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1070 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1071 {
1072     int i;
1073     int remove_head = 0;
1074     int need_realloc = 0;
1075     const xmlChar *cur;
1076 
1077     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1078         return(NULL);
1079     i = *len;
1080     if (i <= 0)
1081         return(NULL);
1082 
1083     cur = src;
1084     while (*cur == 0x20) {
1085         cur++;
1086 	remove_head++;
1087     }
1088     while (*cur != 0) {
1089 	if (*cur == 0x20) {
1090 	    cur++;
1091 	    if ((*cur == 0x20) || (*cur == 0)) {
1092 	        need_realloc = 1;
1093 		break;
1094 	    }
1095 	} else
1096 	    cur++;
1097     }
1098     if (need_realloc) {
1099         xmlChar *ret;
1100 
1101 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1102 	if (ret == NULL) {
1103 	    xmlErrMemory(ctxt, NULL);
1104 	    return(NULL);
1105 	}
1106 	xmlAttrNormalizeSpace(ret, ret);
1107 	*len = (int) strlen((const char *)ret);
1108         return(ret);
1109     } else if (remove_head) {
1110         *len -= remove_head;
1111         memmove(src, src + remove_head, 1 + *len);
1112 	return(src);
1113     }
1114     return(NULL);
1115 }
1116 
1117 /**
1118  * xmlAddDefAttrs:
1119  * @ctxt:  an XML parser context
1120  * @fullname:  the element fullname
1121  * @fullattr:  the attribute fullname
1122  * @value:  the attribute value
1123  *
1124  * Add a defaulted attribute for an element
1125  */
1126 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1127 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1128                const xmlChar *fullname,
1129                const xmlChar *fullattr,
1130                const xmlChar *value) {
1131     xmlDefAttrsPtr defaults;
1132     int len;
1133     const xmlChar *name;
1134     const xmlChar *prefix;
1135 
1136     /*
1137      * Allows to detect attribute redefinitions
1138      */
1139     if (ctxt->attsSpecial != NULL) {
1140         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1141 	    return;
1142     }
1143 
1144     if (ctxt->attsDefault == NULL) {
1145         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1146 	if (ctxt->attsDefault == NULL)
1147 	    goto mem_error;
1148     }
1149 
1150     /*
1151      * split the element name into prefix:localname , the string found
1152      * are within the DTD and then not associated to namespace names.
1153      */
1154     name = xmlSplitQName3(fullname, &len);
1155     if (name == NULL) {
1156         name = xmlDictLookup(ctxt->dict, fullname, -1);
1157 	prefix = NULL;
1158     } else {
1159         name = xmlDictLookup(ctxt->dict, name, -1);
1160 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1161     }
1162 
1163     /*
1164      * make sure there is some storage
1165      */
1166     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1167     if (defaults == NULL) {
1168         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1169 	                   (4 * 5) * sizeof(const xmlChar *));
1170 	if (defaults == NULL)
1171 	    goto mem_error;
1172 	defaults->nbAttrs = 0;
1173 	defaults->maxAttrs = 4;
1174 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1175 	                        defaults, NULL) < 0) {
1176 	    xmlFree(defaults);
1177 	    goto mem_error;
1178 	}
1179     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1180         xmlDefAttrsPtr temp;
1181 
1182         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1183 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1184 	if (temp == NULL)
1185 	    goto mem_error;
1186 	defaults = temp;
1187 	defaults->maxAttrs *= 2;
1188 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1189 	                        defaults, NULL) < 0) {
1190 	    xmlFree(defaults);
1191 	    goto mem_error;
1192 	}
1193     }
1194 
1195     /*
1196      * Split the element name into prefix:localname , the string found
1197      * are within the DTD and hen not associated to namespace names.
1198      */
1199     name = xmlSplitQName3(fullattr, &len);
1200     if (name == NULL) {
1201         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1202 	prefix = NULL;
1203     } else {
1204         name = xmlDictLookup(ctxt->dict, name, -1);
1205 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1206     }
1207 
1208     defaults->values[5 * defaults->nbAttrs] = name;
1209     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1210     /* intern the string and precompute the end */
1211     len = xmlStrlen(value);
1212     value = xmlDictLookup(ctxt->dict, value, len);
1213     defaults->values[5 * defaults->nbAttrs + 2] = value;
1214     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1215     if (ctxt->external)
1216         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1217     else
1218         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1219     defaults->nbAttrs++;
1220 
1221     return;
1222 
1223 mem_error:
1224     xmlErrMemory(ctxt, NULL);
1225     return;
1226 }
1227 
1228 /**
1229  * xmlAddSpecialAttr:
1230  * @ctxt:  an XML parser context
1231  * @fullname:  the element fullname
1232  * @fullattr:  the attribute fullname
1233  * @type:  the attribute type
1234  *
1235  * Register this attribute type
1236  */
1237 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1238 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1239 		  const xmlChar *fullname,
1240 		  const xmlChar *fullattr,
1241 		  int type)
1242 {
1243     if (ctxt->attsSpecial == NULL) {
1244         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1245 	if (ctxt->attsSpecial == NULL)
1246 	    goto mem_error;
1247     }
1248 
1249     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1250         return;
1251 
1252     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1253                      (void *) (long) type);
1254     return;
1255 
1256 mem_error:
1257     xmlErrMemory(ctxt, NULL);
1258     return;
1259 }
1260 
1261 /**
1262  * xmlCleanSpecialAttrCallback:
1263  *
1264  * Removes CDATA attributes from the special attribute table
1265  */
1266 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1267 xmlCleanSpecialAttrCallback(void *payload, void *data,
1268                             const xmlChar *fullname, const xmlChar *fullattr,
1269                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1270     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1271 
1272     if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1273         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1274     }
1275 }
1276 
1277 /**
1278  * xmlCleanSpecialAttr:
1279  * @ctxt:  an XML parser context
1280  *
1281  * Trim the list of attributes defined to remove all those of type
1282  * CDATA as they are not special. This call should be done when finishing
1283  * to parse the DTD and before starting to parse the document root.
1284  */
1285 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1286 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1287 {
1288     if (ctxt->attsSpecial == NULL)
1289         return;
1290 
1291     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1292 
1293     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1294         xmlHashFree(ctxt->attsSpecial, NULL);
1295         ctxt->attsSpecial = NULL;
1296     }
1297     return;
1298 }
1299 
1300 /**
1301  * xmlCheckLanguageID:
1302  * @lang:  pointer to the string value
1303  *
1304  * Checks that the value conforms to the LanguageID production:
1305  *
1306  * NOTE: this is somewhat deprecated, those productions were removed from
1307  *       the XML Second edition.
1308  *
1309  * [33] LanguageID ::= Langcode ('-' Subcode)*
1310  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1311  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1312  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1313  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1314  * [38] Subcode ::= ([a-z] | [A-Z])+
1315  *
1316  * The current REC reference the sucessors of RFC 1766, currently 5646
1317  *
1318  * http://www.rfc-editor.org/rfc/rfc5646.txt
1319  * langtag       = language
1320  *                 ["-" script]
1321  *                 ["-" region]
1322  *                 *("-" variant)
1323  *                 *("-" extension)
1324  *                 ["-" privateuse]
1325  * language      = 2*3ALPHA            ; shortest ISO 639 code
1326  *                 ["-" extlang]       ; sometimes followed by
1327  *                                     ; extended language subtags
1328  *               / 4ALPHA              ; or reserved for future use
1329  *               / 5*8ALPHA            ; or registered language subtag
1330  *
1331  * extlang       = 3ALPHA              ; selected ISO 639 codes
1332  *                 *2("-" 3ALPHA)      ; permanently reserved
1333  *
1334  * script        = 4ALPHA              ; ISO 15924 code
1335  *
1336  * region        = 2ALPHA              ; ISO 3166-1 code
1337  *               / 3DIGIT              ; UN M.49 code
1338  *
1339  * variant       = 5*8alphanum         ; registered variants
1340  *               / (DIGIT 3alphanum)
1341  *
1342  * extension     = singleton 1*("-" (2*8alphanum))
1343  *
1344  *                                     ; Single alphanumerics
1345  *                                     ; "x" reserved for private use
1346  * singleton     = DIGIT               ; 0 - 9
1347  *               / %x41-57             ; A - W
1348  *               / %x59-5A             ; Y - Z
1349  *               / %x61-77             ; a - w
1350  *               / %x79-7A             ; y - z
1351  *
1352  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1353  * The parser below doesn't try to cope with extension or privateuse
1354  * that could be added but that's not interoperable anyway
1355  *
1356  * Returns 1 if correct 0 otherwise
1357  **/
1358 int
xmlCheckLanguageID(const xmlChar * lang)1359 xmlCheckLanguageID(const xmlChar * lang)
1360 {
1361     const xmlChar *cur = lang, *nxt;
1362 
1363     if (cur == NULL)
1364         return (0);
1365     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1366         ((cur[0] == 'I') && (cur[1] == '-')) ||
1367         ((cur[0] == 'x') && (cur[1] == '-')) ||
1368         ((cur[0] == 'X') && (cur[1] == '-'))) {
1369         /*
1370          * Still allow IANA code and user code which were coming
1371          * from the previous version of the XML-1.0 specification
1372          * it's deprecated but we should not fail
1373          */
1374         cur += 2;
1375         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1376                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1377             cur++;
1378         return(cur[0] == 0);
1379     }
1380     nxt = cur;
1381     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1382            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1383            nxt++;
1384     if (nxt - cur >= 4) {
1385         /*
1386          * Reserved
1387          */
1388         if ((nxt - cur > 8) || (nxt[0] != 0))
1389             return(0);
1390         return(1);
1391     }
1392     if (nxt - cur < 2)
1393         return(0);
1394     /* we got an ISO 639 code */
1395     if (nxt[0] == 0)
1396         return(1);
1397     if (nxt[0] != '-')
1398         return(0);
1399 
1400     nxt++;
1401     cur = nxt;
1402     /* now we can have extlang or script or region or variant */
1403     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1404         goto region_m49;
1405 
1406     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1407            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1408            nxt++;
1409     if (nxt - cur == 4)
1410         goto script;
1411     if (nxt - cur == 2)
1412         goto region;
1413     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1414         goto variant;
1415     if (nxt - cur != 3)
1416         return(0);
1417     /* we parsed an extlang */
1418     if (nxt[0] == 0)
1419         return(1);
1420     if (nxt[0] != '-')
1421         return(0);
1422 
1423     nxt++;
1424     cur = nxt;
1425     /* now we can have script or region or variant */
1426     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1427         goto region_m49;
1428 
1429     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1431            nxt++;
1432     if (nxt - cur == 2)
1433         goto region;
1434     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1435         goto variant;
1436     if (nxt - cur != 4)
1437         return(0);
1438     /* we parsed a script */
1439 script:
1440     if (nxt[0] == 0)
1441         return(1);
1442     if (nxt[0] != '-')
1443         return(0);
1444 
1445     nxt++;
1446     cur = nxt;
1447     /* now we can have region or variant */
1448     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1449         goto region_m49;
1450 
1451     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1452            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1453            nxt++;
1454 
1455     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456         goto variant;
1457     if (nxt - cur != 2)
1458         return(0);
1459     /* we parsed a region */
1460 region:
1461     if (nxt[0] == 0)
1462         return(1);
1463     if (nxt[0] != '-')
1464         return(0);
1465 
1466     nxt++;
1467     cur = nxt;
1468     /* now we can just have a variant */
1469     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1470            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1471            nxt++;
1472 
1473     if ((nxt - cur < 5) || (nxt - cur > 8))
1474         return(0);
1475 
1476     /* we parsed a variant */
1477 variant:
1478     if (nxt[0] == 0)
1479         return(1);
1480     if (nxt[0] != '-')
1481         return(0);
1482     /* extensions and private use subtags not checked */
1483     return (1);
1484 
1485 region_m49:
1486     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1487         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1488         nxt += 3;
1489         goto region;
1490     }
1491     return(0);
1492 }
1493 
1494 /************************************************************************
1495  *									*
1496  *		Parser stacks related functions and macros		*
1497  *									*
1498  ************************************************************************/
1499 
1500 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1501                                             const xmlChar ** str);
1502 
1503 #ifdef SAX2
1504 /**
1505  * nsPush:
1506  * @ctxt:  an XML parser context
1507  * @prefix:  the namespace prefix or NULL
1508  * @URL:  the namespace name
1509  *
1510  * Pushes a new parser namespace on top of the ns stack
1511  *
1512  * Returns -1 in case of error, -2 if the namespace should be discarded
1513  *	   and the index in the stack otherwise.
1514  */
1515 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1516 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1517 {
1518     if (ctxt->options & XML_PARSE_NSCLEAN) {
1519         int i;
1520 	for (i = 0;i < ctxt->nsNr;i += 2) {
1521 	    if (ctxt->nsTab[i] == prefix) {
1522 		/* in scope */
1523 	        if (ctxt->nsTab[i + 1] == URL)
1524 		    return(-2);
1525 		/* out of scope keep it */
1526 		break;
1527 	    }
1528 	}
1529     }
1530     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1531 	ctxt->nsMax = 10;
1532 	ctxt->nsNr = 0;
1533 	ctxt->nsTab = (const xmlChar **)
1534 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1535 	if (ctxt->nsTab == NULL) {
1536 	    xmlErrMemory(ctxt, NULL);
1537 	    ctxt->nsMax = 0;
1538             return (-1);
1539 	}
1540     } else if (ctxt->nsNr >= ctxt->nsMax) {
1541         const xmlChar ** tmp;
1542         ctxt->nsMax *= 2;
1543         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1544 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1545         if (tmp == NULL) {
1546             xmlErrMemory(ctxt, NULL);
1547 	    ctxt->nsMax /= 2;
1548             return (-1);
1549         }
1550 	ctxt->nsTab = tmp;
1551     }
1552     ctxt->nsTab[ctxt->nsNr++] = prefix;
1553     ctxt->nsTab[ctxt->nsNr++] = URL;
1554     return (ctxt->nsNr);
1555 }
1556 /**
1557  * nsPop:
1558  * @ctxt: an XML parser context
1559  * @nr:  the number to pop
1560  *
1561  * Pops the top @nr parser prefix/namespace from the ns stack
1562  *
1563  * Returns the number of namespaces removed
1564  */
1565 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1566 nsPop(xmlParserCtxtPtr ctxt, int nr)
1567 {
1568     int i;
1569 
1570     if (ctxt->nsTab == NULL) return(0);
1571     if (ctxt->nsNr < nr) {
1572         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1573         nr = ctxt->nsNr;
1574     }
1575     if (ctxt->nsNr <= 0)
1576         return (0);
1577 
1578     for (i = 0;i < nr;i++) {
1579          ctxt->nsNr--;
1580 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1581     }
1582     return(nr);
1583 }
1584 #endif
1585 
1586 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1587 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1588     const xmlChar **atts;
1589     int *attallocs;
1590     int maxatts;
1591 
1592     if (ctxt->atts == NULL) {
1593 	maxatts = 55; /* allow for 10 attrs by default */
1594 	atts = (const xmlChar **)
1595 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1596 	if (atts == NULL) goto mem_error;
1597 	ctxt->atts = atts;
1598 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1599 	if (attallocs == NULL) goto mem_error;
1600 	ctxt->attallocs = attallocs;
1601 	ctxt->maxatts = maxatts;
1602     } else if (nr + 5 > ctxt->maxatts) {
1603 	maxatts = (nr + 5) * 2;
1604 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1605 				     maxatts * sizeof(const xmlChar *));
1606 	if (atts == NULL) goto mem_error;
1607 	ctxt->atts = atts;
1608 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1609 	                             (maxatts / 5) * sizeof(int));
1610 	if (attallocs == NULL) goto mem_error;
1611 	ctxt->attallocs = attallocs;
1612 	ctxt->maxatts = maxatts;
1613     }
1614     return(ctxt->maxatts);
1615 mem_error:
1616     xmlErrMemory(ctxt, NULL);
1617     return(-1);
1618 }
1619 
1620 /**
1621  * inputPush:
1622  * @ctxt:  an XML parser context
1623  * @value:  the parser input
1624  *
1625  * Pushes a new parser input on top of the input stack
1626  *
1627  * Returns -1 in case of error, the index in the stack otherwise
1628  */
1629 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1630 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1631 {
1632     if ((ctxt == NULL) || (value == NULL))
1633         return(-1);
1634     if (ctxt->inputNr >= ctxt->inputMax) {
1635         ctxt->inputMax *= 2;
1636         ctxt->inputTab =
1637             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1638                                              ctxt->inputMax *
1639                                              sizeof(ctxt->inputTab[0]));
1640         if (ctxt->inputTab == NULL) {
1641             xmlErrMemory(ctxt, NULL);
1642 	    xmlFreeInputStream(value);
1643 	    ctxt->inputMax /= 2;
1644 	    value = NULL;
1645             return (-1);
1646         }
1647     }
1648     ctxt->inputTab[ctxt->inputNr] = value;
1649     ctxt->input = value;
1650     return (ctxt->inputNr++);
1651 }
1652 /**
1653  * inputPop:
1654  * @ctxt: an XML parser context
1655  *
1656  * Pops the top parser input from the input stack
1657  *
1658  * Returns the input just removed
1659  */
1660 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1661 inputPop(xmlParserCtxtPtr ctxt)
1662 {
1663     xmlParserInputPtr ret;
1664 
1665     if (ctxt == NULL)
1666         return(NULL);
1667     if (ctxt->inputNr <= 0)
1668         return (NULL);
1669     ctxt->inputNr--;
1670     if (ctxt->inputNr > 0)
1671         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1672     else
1673         ctxt->input = NULL;
1674     ret = ctxt->inputTab[ctxt->inputNr];
1675     ctxt->inputTab[ctxt->inputNr] = NULL;
1676     return (ret);
1677 }
1678 /**
1679  * nodePush:
1680  * @ctxt:  an XML parser context
1681  * @value:  the element node
1682  *
1683  * Pushes a new element node on top of the node stack
1684  *
1685  * Returns -1 in case of error, the index in the stack otherwise
1686  */
1687 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1688 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1689 {
1690     if (ctxt == NULL) return(0);
1691     if (ctxt->nodeNr >= ctxt->nodeMax) {
1692         xmlNodePtr *tmp;
1693 
1694 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1695                                       ctxt->nodeMax * 2 *
1696                                       sizeof(ctxt->nodeTab[0]));
1697         if (tmp == NULL) {
1698             xmlErrMemory(ctxt, NULL);
1699             return (-1);
1700         }
1701         ctxt->nodeTab = tmp;
1702 	ctxt->nodeMax *= 2;
1703     }
1704     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1705         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1706 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1707 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1708 			  xmlParserMaxDepth);
1709 	ctxt->instate = XML_PARSER_EOF;
1710 	return(-1);
1711     }
1712     ctxt->nodeTab[ctxt->nodeNr] = value;
1713     ctxt->node = value;
1714     return (ctxt->nodeNr++);
1715 }
1716 
1717 /**
1718  * nodePop:
1719  * @ctxt: an XML parser context
1720  *
1721  * Pops the top element node from the node stack
1722  *
1723  * Returns the node just removed
1724  */
1725 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1726 nodePop(xmlParserCtxtPtr ctxt)
1727 {
1728     xmlNodePtr ret;
1729 
1730     if (ctxt == NULL) return(NULL);
1731     if (ctxt->nodeNr <= 0)
1732         return (NULL);
1733     ctxt->nodeNr--;
1734     if (ctxt->nodeNr > 0)
1735         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1736     else
1737         ctxt->node = NULL;
1738     ret = ctxt->nodeTab[ctxt->nodeNr];
1739     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1740     return (ret);
1741 }
1742 
1743 #ifdef LIBXML_PUSH_ENABLED
1744 /**
1745  * nameNsPush:
1746  * @ctxt:  an XML parser context
1747  * @value:  the element name
1748  * @prefix:  the element prefix
1749  * @URI:  the element namespace name
1750  *
1751  * Pushes a new element name/prefix/URL on top of the name stack
1752  *
1753  * Returns -1 in case of error, the index in the stack otherwise
1754  */
1755 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1756 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1757            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1758 {
1759     if (ctxt->nameNr >= ctxt->nameMax) {
1760         const xmlChar * *tmp;
1761         void **tmp2;
1762         ctxt->nameMax *= 2;
1763         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1764                                     ctxt->nameMax *
1765                                     sizeof(ctxt->nameTab[0]));
1766         if (tmp == NULL) {
1767 	    ctxt->nameMax /= 2;
1768 	    goto mem_error;
1769         }
1770 	ctxt->nameTab = tmp;
1771         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1772                                     ctxt->nameMax * 3 *
1773                                     sizeof(ctxt->pushTab[0]));
1774         if (tmp2 == NULL) {
1775 	    ctxt->nameMax /= 2;
1776 	    goto mem_error;
1777         }
1778 	ctxt->pushTab = tmp2;
1779     }
1780     ctxt->nameTab[ctxt->nameNr] = value;
1781     ctxt->name = value;
1782     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1783     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1784     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1785     return (ctxt->nameNr++);
1786 mem_error:
1787     xmlErrMemory(ctxt, NULL);
1788     return (-1);
1789 }
1790 /**
1791  * nameNsPop:
1792  * @ctxt: an XML parser context
1793  *
1794  * Pops the top element/prefix/URI name from the name stack
1795  *
1796  * Returns the name just removed
1797  */
1798 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1799 nameNsPop(xmlParserCtxtPtr ctxt)
1800 {
1801     const xmlChar *ret;
1802 
1803     if (ctxt->nameNr <= 0)
1804         return (NULL);
1805     ctxt->nameNr--;
1806     if (ctxt->nameNr > 0)
1807         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1808     else
1809         ctxt->name = NULL;
1810     ret = ctxt->nameTab[ctxt->nameNr];
1811     ctxt->nameTab[ctxt->nameNr] = NULL;
1812     return (ret);
1813 }
1814 #endif /* LIBXML_PUSH_ENABLED */
1815 
1816 /**
1817  * namePush:
1818  * @ctxt:  an XML parser context
1819  * @value:  the element name
1820  *
1821  * Pushes a new element name on top of the name stack
1822  *
1823  * Returns -1 in case of error, the index in the stack otherwise
1824  */
1825 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1826 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1827 {
1828     if (ctxt == NULL) return (-1);
1829 
1830     if (ctxt->nameNr >= ctxt->nameMax) {
1831         const xmlChar * *tmp;
1832         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1833                                     ctxt->nameMax * 2 *
1834                                     sizeof(ctxt->nameTab[0]));
1835         if (tmp == NULL) {
1836 	    goto mem_error;
1837         }
1838 	ctxt->nameTab = tmp;
1839         ctxt->nameMax *= 2;
1840     }
1841     ctxt->nameTab[ctxt->nameNr] = value;
1842     ctxt->name = value;
1843     return (ctxt->nameNr++);
1844 mem_error:
1845     xmlErrMemory(ctxt, NULL);
1846     return (-1);
1847 }
1848 /**
1849  * namePop:
1850  * @ctxt: an XML parser context
1851  *
1852  * Pops the top element name from the name stack
1853  *
1854  * Returns the name just removed
1855  */
1856 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1857 namePop(xmlParserCtxtPtr ctxt)
1858 {
1859     const xmlChar *ret;
1860 
1861     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1862         return (NULL);
1863     ctxt->nameNr--;
1864     if (ctxt->nameNr > 0)
1865         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1866     else
1867         ctxt->name = NULL;
1868     ret = ctxt->nameTab[ctxt->nameNr];
1869     ctxt->nameTab[ctxt->nameNr] = NULL;
1870     return (ret);
1871 }
1872 
spacePush(xmlParserCtxtPtr ctxt,int val)1873 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1874     if (ctxt->spaceNr >= ctxt->spaceMax) {
1875         int *tmp;
1876 
1877 	ctxt->spaceMax *= 2;
1878         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1879 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1880         if (tmp == NULL) {
1881 	    xmlErrMemory(ctxt, NULL);
1882 	    ctxt->spaceMax /=2;
1883 	    return(-1);
1884 	}
1885 	ctxt->spaceTab = tmp;
1886     }
1887     ctxt->spaceTab[ctxt->spaceNr] = val;
1888     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1889     return(ctxt->spaceNr++);
1890 }
1891 
spacePop(xmlParserCtxtPtr ctxt)1892 static int spacePop(xmlParserCtxtPtr ctxt) {
1893     int ret;
1894     if (ctxt->spaceNr <= 0) return(0);
1895     ctxt->spaceNr--;
1896     if (ctxt->spaceNr > 0)
1897 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1898     else
1899         ctxt->space = &ctxt->spaceTab[0];
1900     ret = ctxt->spaceTab[ctxt->spaceNr];
1901     ctxt->spaceTab[ctxt->spaceNr] = -1;
1902     return(ret);
1903 }
1904 
1905 /*
1906  * Macros for accessing the content. Those should be used only by the parser,
1907  * and not exported.
1908  *
1909  * Dirty macros, i.e. one often need to make assumption on the context to
1910  * use them
1911  *
1912  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1913  *           To be used with extreme caution since operations consuming
1914  *           characters may move the input buffer to a different location !
1915  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1916  *           This should be used internally by the parser
1917  *           only to compare to ASCII values otherwise it would break when
1918  *           running with UTF-8 encoding.
1919  *   RAW     same as CUR but in the input buffer, bypass any token
1920  *           extraction that may have been done
1921  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1922  *           to compare on ASCII based substring.
1923  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1924  *           strings without newlines within the parser.
1925  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1926  *           defined char within the parser.
1927  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1928  *
1929  *   NEXT    Skip to the next character, this does the proper decoding
1930  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1931  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1932  *   CUR_CHAR(l) returns the current unicode character (int), set l
1933  *           to the number of xmlChars used for the encoding [0-5].
1934  *   CUR_SCHAR  same but operate on a string instead of the context
1935  *   COPY_BUF  copy the current unicode char to the target buffer, increment
1936  *            the index
1937  *   GROW, SHRINK  handling of input buffers
1938  */
1939 
1940 #define RAW (*ctxt->input->cur)
1941 #define CUR (*ctxt->input->cur)
1942 #define NXT(val) ctxt->input->cur[(val)]
1943 #define CUR_PTR ctxt->input->cur
1944 
1945 #define CMP4( s, c1, c2, c3, c4 ) \
1946   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1947     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1948 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1949   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1950 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1951   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1952 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1953   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1954 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1955   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1956 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1957   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1958     ((unsigned char *) s)[ 8 ] == c9 )
1959 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1960   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1961     ((unsigned char *) s)[ 9 ] == c10 )
1962 
1963 #define SKIP(val) do {							\
1964     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
1965     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1966     if ((*ctxt->input->cur == 0) &&					\
1967         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1968 	    xmlPopInput(ctxt);						\
1969   } while (0)
1970 
1971 #define SKIPL(val) do {							\
1972     int skipl;								\
1973     for(skipl=0; skipl<val; skipl++) {					\
1974     	if (*(ctxt->input->cur) == '\n') {				\
1975 	ctxt->input->line++; ctxt->input->col = 1;			\
1976     	} else ctxt->input->col++;					\
1977     	ctxt->nbChars++;						\
1978 	ctxt->input->cur++;						\
1979     }									\
1980     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1981     if ((*ctxt->input->cur == 0) &&					\
1982         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1983 	    xmlPopInput(ctxt);						\
1984   } while (0)
1985 
1986 #define SHRINK if ((ctxt->progressive == 0) &&				\
1987 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1988 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1989 	xmlSHRINK (ctxt);
1990 
xmlSHRINK(xmlParserCtxtPtr ctxt)1991 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1992     xmlParserInputShrink(ctxt->input);
1993     if ((*ctxt->input->cur == 0) &&
1994         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1995 	    xmlPopInput(ctxt);
1996   }
1997 
1998 #define GROW if ((ctxt->progressive == 0) &&				\
1999 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
2000 	xmlGROW (ctxt);
2001 
xmlGROW(xmlParserCtxtPtr ctxt)2002 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2003     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2004     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2005         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2006 	    xmlPopInput(ctxt);
2007 }
2008 
2009 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2010 
2011 #define NEXT xmlNextChar(ctxt)
2012 
2013 #define NEXT1 {								\
2014 	ctxt->input->col++;						\
2015 	ctxt->input->cur++;						\
2016 	ctxt->nbChars++;						\
2017 	if (*ctxt->input->cur == 0)					\
2018 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
2019     }
2020 
2021 #define NEXTL(l) do {							\
2022     if (*(ctxt->input->cur) == '\n') {					\
2023 	ctxt->input->line++; ctxt->input->col = 1;			\
2024     } else ctxt->input->col++;						\
2025     ctxt->input->cur += l;				\
2026     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
2027   } while (0)
2028 
2029 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2030 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2031 
2032 #define COPY_BUF(l,b,i,v)						\
2033     if (l == 1) b[i++] = (xmlChar) v;					\
2034     else i += xmlCopyCharMultiByte(&b[i],v)
2035 
2036 /**
2037  * xmlSkipBlankChars:
2038  * @ctxt:  the XML parser context
2039  *
2040  * skip all blanks character found at that point in the input streams.
2041  * It pops up finished entities in the process if allowable at that point.
2042  *
2043  * Returns the number of space chars skipped
2044  */
2045 
2046 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2047 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2048     int res = 0;
2049 
2050     /*
2051      * It's Okay to use CUR/NEXT here since all the blanks are on
2052      * the ASCII range.
2053      */
2054     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2055 	const xmlChar *cur;
2056 	/*
2057 	 * if we are in the document content, go really fast
2058 	 */
2059 	cur = ctxt->input->cur;
2060 	while (IS_BLANK_CH(*cur)) {
2061 	    if (*cur == '\n') {
2062 		ctxt->input->line++; ctxt->input->col = 1;
2063 	    }
2064 	    cur++;
2065 	    res++;
2066 	    if (*cur == 0) {
2067 		ctxt->input->cur = cur;
2068 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2069 		cur = ctxt->input->cur;
2070 	    }
2071 	}
2072 	ctxt->input->cur = cur;
2073     } else {
2074 	int cur;
2075 	do {
2076 	    cur = CUR;
2077 	    while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2078 		NEXT;
2079 		cur = CUR;
2080 		res++;
2081 	    }
2082 	    while ((cur == 0) && (ctxt->inputNr > 1) &&
2083 		   (ctxt->instate != XML_PARSER_COMMENT)) {
2084 		xmlPopInput(ctxt);
2085 		cur = CUR;
2086 	    }
2087 	    /*
2088 	     * Need to handle support of entities branching here
2089 	     */
2090 	    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2091 	} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2092     }
2093     return(res);
2094 }
2095 
2096 /************************************************************************
2097  *									*
2098  *		Commodity functions to handle entities			*
2099  *									*
2100  ************************************************************************/
2101 
2102 /**
2103  * xmlPopInput:
2104  * @ctxt:  an XML parser context
2105  *
2106  * xmlPopInput: the current input pointed by ctxt->input came to an end
2107  *          pop it and return the next char.
2108  *
2109  * Returns the current xmlChar in the parser context
2110  */
2111 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2112 xmlPopInput(xmlParserCtxtPtr ctxt) {
2113     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2114     if (xmlParserDebugEntities)
2115 	xmlGenericError(xmlGenericErrorContext,
2116 		"Popping input %d\n", ctxt->inputNr);
2117     xmlFreeInputStream(inputPop(ctxt));
2118     if ((*ctxt->input->cur == 0) &&
2119         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2120 	    return(xmlPopInput(ctxt));
2121     return(CUR);
2122 }
2123 
2124 /**
2125  * xmlPushInput:
2126  * @ctxt:  an XML parser context
2127  * @input:  an XML parser input fragment (entity, XML fragment ...).
2128  *
2129  * xmlPushInput: switch to a new input stream which is stacked on top
2130  *               of the previous one(s).
2131  * Returns -1 in case of error or the index in the input stack
2132  */
2133 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2134 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2135     int ret;
2136     if (input == NULL) return(-1);
2137 
2138     if (xmlParserDebugEntities) {
2139 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2140 	    xmlGenericError(xmlGenericErrorContext,
2141 		    "%s(%d): ", ctxt->input->filename,
2142 		    ctxt->input->line);
2143 	xmlGenericError(xmlGenericErrorContext,
2144 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2145     }
2146     ret = inputPush(ctxt, input);
2147     GROW;
2148     return(ret);
2149 }
2150 
2151 /**
2152  * xmlParseCharRef:
2153  * @ctxt:  an XML parser context
2154  *
2155  * parse Reference declarations
2156  *
2157  * [66] CharRef ::= '&#' [0-9]+ ';' |
2158  *                  '&#x' [0-9a-fA-F]+ ';'
2159  *
2160  * [ WFC: Legal Character ]
2161  * Characters referred to using character references must match the
2162  * production for Char.
2163  *
2164  * Returns the value parsed (as an int), 0 in case of error
2165  */
2166 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2167 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2168     unsigned int val = 0;
2169     int count = 0;
2170     unsigned int outofrange = 0;
2171 
2172     /*
2173      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2174      */
2175     if ((RAW == '&') && (NXT(1) == '#') &&
2176         (NXT(2) == 'x')) {
2177 	SKIP(3);
2178 	GROW;
2179 	while (RAW != ';') { /* loop blocked by count */
2180 	    if (count++ > 20) {
2181 		count = 0;
2182 		GROW;
2183 	    }
2184 	    if ((RAW >= '0') && (RAW <= '9'))
2185 	        val = val * 16 + (CUR - '0');
2186 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2187 	        val = val * 16 + (CUR - 'a') + 10;
2188 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2189 	        val = val * 16 + (CUR - 'A') + 10;
2190 	    else {
2191 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2192 		val = 0;
2193 		break;
2194 	    }
2195 	    if (val > 0x10FFFF)
2196 	        outofrange = val;
2197 
2198 	    NEXT;
2199 	    count++;
2200 	}
2201 	if (RAW == ';') {
2202 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2203 	    ctxt->input->col++;
2204 	    ctxt->nbChars ++;
2205 	    ctxt->input->cur++;
2206 	}
2207     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2208 	SKIP(2);
2209 	GROW;
2210 	while (RAW != ';') { /* loop blocked by count */
2211 	    if (count++ > 20) {
2212 		count = 0;
2213 		GROW;
2214 	    }
2215 	    if ((RAW >= '0') && (RAW <= '9'))
2216 	        val = val * 10 + (CUR - '0');
2217 	    else {
2218 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2219 		val = 0;
2220 		break;
2221 	    }
2222 	    if (val > 0x10FFFF)
2223 	        outofrange = val;
2224 
2225 	    NEXT;
2226 	    count++;
2227 	}
2228 	if (RAW == ';') {
2229 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2230 	    ctxt->input->col++;
2231 	    ctxt->nbChars ++;
2232 	    ctxt->input->cur++;
2233 	}
2234     } else {
2235         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2236     }
2237 
2238     /*
2239      * [ WFC: Legal Character ]
2240      * Characters referred to using character references must match the
2241      * production for Char.
2242      */
2243     if ((IS_CHAR(val) && (outofrange == 0))) {
2244         return(val);
2245     } else {
2246         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2247                           "xmlParseCharRef: invalid xmlChar value %d\n",
2248 	                  val);
2249     }
2250     return(0);
2251 }
2252 
2253 /**
2254  * xmlParseStringCharRef:
2255  * @ctxt:  an XML parser context
2256  * @str:  a pointer to an index in the string
2257  *
2258  * parse Reference declarations, variant parsing from a string rather
2259  * than an an input flow.
2260  *
2261  * [66] CharRef ::= '&#' [0-9]+ ';' |
2262  *                  '&#x' [0-9a-fA-F]+ ';'
2263  *
2264  * [ WFC: Legal Character ]
2265  * Characters referred to using character references must match the
2266  * production for Char.
2267  *
2268  * Returns the value parsed (as an int), 0 in case of error, str will be
2269  *         updated to the current value of the index
2270  */
2271 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2272 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2273     const xmlChar *ptr;
2274     xmlChar cur;
2275     unsigned int val = 0;
2276     unsigned int outofrange = 0;
2277 
2278     if ((str == NULL) || (*str == NULL)) return(0);
2279     ptr = *str;
2280     cur = *ptr;
2281     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2282 	ptr += 3;
2283 	cur = *ptr;
2284 	while (cur != ';') { /* Non input consuming loop */
2285 	    if ((cur >= '0') && (cur <= '9'))
2286 	        val = val * 16 + (cur - '0');
2287 	    else if ((cur >= 'a') && (cur <= 'f'))
2288 	        val = val * 16 + (cur - 'a') + 10;
2289 	    else if ((cur >= 'A') && (cur <= 'F'))
2290 	        val = val * 16 + (cur - 'A') + 10;
2291 	    else {
2292 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2293 		val = 0;
2294 		break;
2295 	    }
2296 	    if (val > 0x10FFFF)
2297 	        outofrange = val;
2298 
2299 	    ptr++;
2300 	    cur = *ptr;
2301 	}
2302 	if (cur == ';')
2303 	    ptr++;
2304     } else if  ((cur == '&') && (ptr[1] == '#')){
2305 	ptr += 2;
2306 	cur = *ptr;
2307 	while (cur != ';') { /* Non input consuming loops */
2308 	    if ((cur >= '0') && (cur <= '9'))
2309 	        val = val * 10 + (cur - '0');
2310 	    else {
2311 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2312 		val = 0;
2313 		break;
2314 	    }
2315 	    if (val > 0x10FFFF)
2316 	        outofrange = val;
2317 
2318 	    ptr++;
2319 	    cur = *ptr;
2320 	}
2321 	if (cur == ';')
2322 	    ptr++;
2323     } else {
2324 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2325 	return(0);
2326     }
2327     *str = ptr;
2328 
2329     /*
2330      * [ WFC: Legal Character ]
2331      * Characters referred to using character references must match the
2332      * production for Char.
2333      */
2334     if ((IS_CHAR(val) && (outofrange == 0))) {
2335         return(val);
2336     } else {
2337         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2338 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2339 			  val);
2340     }
2341     return(0);
2342 }
2343 
2344 /**
2345  * xmlNewBlanksWrapperInputStream:
2346  * @ctxt:  an XML parser context
2347  * @entity:  an Entity pointer
2348  *
2349  * Create a new input stream for wrapping
2350  * blanks around a PEReference
2351  *
2352  * Returns the new input stream or NULL
2353  */
2354 
deallocblankswrapper(xmlChar * str)2355 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2356 
2357 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2358 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2359     xmlParserInputPtr input;
2360     xmlChar *buffer;
2361     size_t length;
2362     if (entity == NULL) {
2363 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2364 	            "xmlNewBlanksWrapperInputStream entity\n");
2365 	return(NULL);
2366     }
2367     if (xmlParserDebugEntities)
2368 	xmlGenericError(xmlGenericErrorContext,
2369 		"new blanks wrapper for entity: %s\n", entity->name);
2370     input = xmlNewInputStream(ctxt);
2371     if (input == NULL) {
2372 	return(NULL);
2373     }
2374     length = xmlStrlen(entity->name) + 5;
2375     buffer = xmlMallocAtomic(length);
2376     if (buffer == NULL) {
2377 	xmlErrMemory(ctxt, NULL);
2378         xmlFree(input);
2379     	return(NULL);
2380     }
2381     buffer [0] = ' ';
2382     buffer [1] = '%';
2383     buffer [length-3] = ';';
2384     buffer [length-2] = ' ';
2385     buffer [length-1] = 0;
2386     memcpy(buffer + 2, entity->name, length - 5);
2387     input->free = deallocblankswrapper;
2388     input->base = buffer;
2389     input->cur = buffer;
2390     input->length = length;
2391     input->end = &buffer[length];
2392     return(input);
2393 }
2394 
2395 /**
2396  * xmlParserHandlePEReference:
2397  * @ctxt:  the parser context
2398  *
2399  * [69] PEReference ::= '%' Name ';'
2400  *
2401  * [ WFC: No Recursion ]
2402  * A parsed entity must not contain a recursive
2403  * reference to itself, either directly or indirectly.
2404  *
2405  * [ WFC: Entity Declared ]
2406  * In a document without any DTD, a document with only an internal DTD
2407  * subset which contains no parameter entity references, or a document
2408  * with "standalone='yes'", ...  ... The declaration of a parameter
2409  * entity must precede any reference to it...
2410  *
2411  * [ VC: Entity Declared ]
2412  * In a document with an external subset or external parameter entities
2413  * with "standalone='no'", ...  ... The declaration of a parameter entity
2414  * must precede any reference to it...
2415  *
2416  * [ WFC: In DTD ]
2417  * Parameter-entity references may only appear in the DTD.
2418  * NOTE: misleading but this is handled.
2419  *
2420  * A PEReference may have been detected in the current input stream
2421  * the handling is done accordingly to
2422  *      http://www.w3.org/TR/REC-xml#entproc
2423  * i.e.
2424  *   - Included in literal in entity values
2425  *   - Included as Parameter Entity reference within DTDs
2426  */
2427 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2428 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2429     const xmlChar *name;
2430     xmlEntityPtr entity = NULL;
2431     xmlParserInputPtr input;
2432 
2433     if (RAW != '%') return;
2434     switch(ctxt->instate) {
2435 	case XML_PARSER_CDATA_SECTION:
2436 	    return;
2437         case XML_PARSER_COMMENT:
2438 	    return;
2439 	case XML_PARSER_START_TAG:
2440 	    return;
2441 	case XML_PARSER_END_TAG:
2442 	    return;
2443         case XML_PARSER_EOF:
2444 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2445 	    return;
2446         case XML_PARSER_PROLOG:
2447 	case XML_PARSER_START:
2448 	case XML_PARSER_MISC:
2449 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2450 	    return;
2451 	case XML_PARSER_ENTITY_DECL:
2452         case XML_PARSER_CONTENT:
2453         case XML_PARSER_ATTRIBUTE_VALUE:
2454         case XML_PARSER_PI:
2455 	case XML_PARSER_SYSTEM_LITERAL:
2456 	case XML_PARSER_PUBLIC_LITERAL:
2457 	    /* we just ignore it there */
2458 	    return;
2459         case XML_PARSER_EPILOG:
2460 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2461 	    return;
2462 	case XML_PARSER_ENTITY_VALUE:
2463 	    /*
2464 	     * NOTE: in the case of entity values, we don't do the
2465 	     *       substitution here since we need the literal
2466 	     *       entity value to be able to save the internal
2467 	     *       subset of the document.
2468 	     *       This will be handled by xmlStringDecodeEntities
2469 	     */
2470 	    return;
2471         case XML_PARSER_DTD:
2472 	    /*
2473 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2474 	     * In the internal DTD subset, parameter-entity references
2475 	     * can occur only where markup declarations can occur, not
2476 	     * within markup declarations.
2477 	     * In that case this is handled in xmlParseMarkupDecl
2478 	     */
2479 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2480 		return;
2481 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2482 		return;
2483             break;
2484         case XML_PARSER_IGNORE:
2485             return;
2486     }
2487 
2488     NEXT;
2489     name = xmlParseName(ctxt);
2490     if (xmlParserDebugEntities)
2491 	xmlGenericError(xmlGenericErrorContext,
2492 		"PEReference: %s\n", name);
2493     if (name == NULL) {
2494 	xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2495     } else {
2496 	if (RAW == ';') {
2497 	    NEXT;
2498 	    if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2499 		entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2500 	    if (entity == NULL) {
2501 
2502 		/*
2503 		 * [ WFC: Entity Declared ]
2504 		 * In a document without any DTD, a document with only an
2505 		 * internal DTD subset which contains no parameter entity
2506 		 * references, or a document with "standalone='yes'", ...
2507 		 * ... The declaration of a parameter entity must precede
2508 		 * any reference to it...
2509 		 */
2510 		if ((ctxt->standalone == 1) ||
2511 		    ((ctxt->hasExternalSubset == 0) &&
2512 		     (ctxt->hasPErefs == 0))) {
2513 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2514 			 "PEReference: %%%s; not found\n", name);
2515 	        } else {
2516 		    /*
2517 		     * [ VC: Entity Declared ]
2518 		     * In a document with an external subset or external
2519 		     * parameter entities with "standalone='no'", ...
2520 		     * ... The declaration of a parameter entity must precede
2521 		     * any reference to it...
2522 		     */
2523 		    if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2524 		        xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2525 			                 "PEReference: %%%s; not found\n",
2526 				         name, NULL);
2527 		    } else
2528 		        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2529 			              "PEReference: %%%s; not found\n",
2530 				      name, NULL);
2531 		    ctxt->valid = 0;
2532 		}
2533 	    } else if (ctxt->input->free != deallocblankswrapper) {
2534 		    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2535 		    if (xmlPushInput(ctxt, input) < 0)
2536 		        return;
2537 	    } else {
2538 	        if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2539 		    (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2540 		    xmlChar start[4];
2541 		    xmlCharEncoding enc;
2542 
2543 		    /*
2544 		     * handle the extra spaces added before and after
2545 		     * c.f. http://www.w3.org/TR/REC-xml#as-PE
2546 		     * this is done independently.
2547 		     */
2548 		    input = xmlNewEntityInputStream(ctxt, entity);
2549 		    if (xmlPushInput(ctxt, input) < 0)
2550 		        return;
2551 
2552 		    /*
2553 		     * Get the 4 first bytes and decode the charset
2554 		     * if enc != XML_CHAR_ENCODING_NONE
2555 		     * plug some encoding conversion routines.
2556 		     * Note that, since we may have some non-UTF8
2557 		     * encoding (like UTF16, bug 135229), the 'length'
2558 		     * is not known, but we can calculate based upon
2559 		     * the amount of data in the buffer.
2560 		     */
2561 		    GROW
2562 		    if ((ctxt->input->end - ctxt->input->cur)>=4) {
2563 			start[0] = RAW;
2564 			start[1] = NXT(1);
2565 			start[2] = NXT(2);
2566 			start[3] = NXT(3);
2567 			enc = xmlDetectCharEncoding(start, 4);
2568 			if (enc != XML_CHAR_ENCODING_NONE) {
2569 			    xmlSwitchEncoding(ctxt, enc);
2570 			}
2571 		    }
2572 
2573 		    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2574 			(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2575 			(IS_BLANK_CH(NXT(5)))) {
2576 			xmlParseTextDecl(ctxt);
2577 		    }
2578 		} else {
2579 		    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2580 			     "PEReference: %s is not a parameter entity\n",
2581 				      name);
2582 		}
2583 	    }
2584 	} else {
2585 	    xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2586 	}
2587     }
2588 }
2589 
2590 /*
2591  * Macro used to grow the current buffer.
2592  */
2593 #define growBuffer(buffer, n) {						\
2594     xmlChar *tmp;							\
2595     buffer##_size *= 2;							\
2596     buffer##_size += n;							\
2597     tmp = (xmlChar *)							\
2598 		xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));	\
2599     if (tmp == NULL) goto mem_error;					\
2600     buffer = tmp;							\
2601 }
2602 
2603 /**
2604  * xmlStringLenDecodeEntities:
2605  * @ctxt:  the parser context
2606  * @str:  the input string
2607  * @len: the string length
2608  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2609  * @end:  an end marker xmlChar, 0 if none
2610  * @end2:  an end marker xmlChar, 0 if none
2611  * @end3:  an end marker xmlChar, 0 if none
2612  *
2613  * Takes a entity string content and process to do the adequate substitutions.
2614  *
2615  * [67] Reference ::= EntityRef | CharRef
2616  *
2617  * [69] PEReference ::= '%' Name ';'
2618  *
2619  * Returns A newly allocated string with the substitution done. The caller
2620  *      must deallocate it !
2621  */
2622 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2623 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2624 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2625     xmlChar *buffer = NULL;
2626     int buffer_size = 0;
2627 
2628     xmlChar *current = NULL;
2629     xmlChar *rep = NULL;
2630     const xmlChar *last;
2631     xmlEntityPtr ent;
2632     int c,l;
2633     int nbchars = 0;
2634 
2635     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2636 	return(NULL);
2637     last = str + len;
2638 
2639     if (((ctxt->depth > 40) &&
2640          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2641 	(ctxt->depth > 1024)) {
2642 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2643 	return(NULL);
2644     }
2645 
2646     /*
2647      * allocate a translation buffer.
2648      */
2649     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2650     buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2651     if (buffer == NULL) goto mem_error;
2652 
2653     /*
2654      * OK loop until we reach one of the ending char or a size limit.
2655      * we are operating on already parsed values.
2656      */
2657     if (str < last)
2658 	c = CUR_SCHAR(str, l);
2659     else
2660         c = 0;
2661     while ((c != 0) && (c != end) && /* non input consuming loop */
2662 	   (c != end2) && (c != end3)) {
2663 
2664 	if (c == 0) break;
2665         if ((c == '&') && (str[1] == '#')) {
2666 	    int val = xmlParseStringCharRef(ctxt, &str);
2667 	    if (val != 0) {
2668 		COPY_BUF(0,buffer,nbchars,val);
2669 	    }
2670 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2671 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2672 	    }
2673 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2674 	    if (xmlParserDebugEntities)
2675 		xmlGenericError(xmlGenericErrorContext,
2676 			"String decoding Entity Reference: %.30s\n",
2677 			str);
2678 	    ent = xmlParseStringEntityRef(ctxt, &str);
2679 	    if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2680 	        (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2681 	        goto int_error;
2682 	    if (ent != NULL)
2683 	        ctxt->nbentities += ent->checked;
2684 	    if ((ent != NULL) &&
2685 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2686 		if (ent->content != NULL) {
2687 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2688 		    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2689 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2690 		    }
2691 		} else {
2692 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2693 			    "predefined entity has no content\n");
2694 		}
2695 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2696 		ctxt->depth++;
2697 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2698 			                      0, 0, 0);
2699 		ctxt->depth--;
2700 
2701 		if (rep != NULL) {
2702 		    current = rep;
2703 		    while (*current != 0) { /* non input consuming loop */
2704 			buffer[nbchars++] = *current++;
2705 			if (nbchars >
2706 		            buffer_size - XML_PARSER_BUFFER_SIZE) {
2707 			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2708 				goto int_error;
2709 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2710 			}
2711 		    }
2712 		    xmlFree(rep);
2713 		    rep = NULL;
2714 		}
2715 	    } else if (ent != NULL) {
2716 		int i = xmlStrlen(ent->name);
2717 		const xmlChar *cur = ent->name;
2718 
2719 		buffer[nbchars++] = '&';
2720 		if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2721 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2722 		}
2723 		for (;i > 0;i--)
2724 		    buffer[nbchars++] = *cur++;
2725 		buffer[nbchars++] = ';';
2726 	    }
2727 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2728 	    if (xmlParserDebugEntities)
2729 		xmlGenericError(xmlGenericErrorContext,
2730 			"String decoding PE Reference: %.30s\n", str);
2731 	    ent = xmlParseStringPEReference(ctxt, &str);
2732 	    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2733 	        goto int_error;
2734 	    if (ent != NULL)
2735 	        ctxt->nbentities += ent->checked;
2736 	    if (ent != NULL) {
2737                 if (ent->content == NULL) {
2738 		    xmlLoadEntityContent(ctxt, ent);
2739 		}
2740 		ctxt->depth++;
2741 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2742 			                      0, 0, 0);
2743 		ctxt->depth--;
2744 		if (rep != NULL) {
2745 		    current = rep;
2746 		    while (*current != 0) { /* non input consuming loop */
2747 			buffer[nbchars++] = *current++;
2748 			if (nbchars >
2749 		            buffer_size - XML_PARSER_BUFFER_SIZE) {
2750 			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2751 			        goto int_error;
2752 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2753 			}
2754 		    }
2755 		    xmlFree(rep);
2756 		    rep = NULL;
2757 		}
2758 	    }
2759 	} else {
2760 	    COPY_BUF(l,buffer,nbchars,c);
2761 	    str += l;
2762 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2763 	      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2764 	    }
2765 	}
2766 	if (str < last)
2767 	    c = CUR_SCHAR(str, l);
2768 	else
2769 	    c = 0;
2770     }
2771     buffer[nbchars] = 0;
2772     return(buffer);
2773 
2774 mem_error:
2775     xmlErrMemory(ctxt, NULL);
2776 int_error:
2777     if (rep != NULL)
2778         xmlFree(rep);
2779     if (buffer != NULL)
2780         xmlFree(buffer);
2781     return(NULL);
2782 }
2783 
2784 /**
2785  * xmlStringDecodeEntities:
2786  * @ctxt:  the parser context
2787  * @str:  the input string
2788  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2789  * @end:  an end marker xmlChar, 0 if none
2790  * @end2:  an end marker xmlChar, 0 if none
2791  * @end3:  an end marker xmlChar, 0 if none
2792  *
2793  * Takes a entity string content and process to do the adequate substitutions.
2794  *
2795  * [67] Reference ::= EntityRef | CharRef
2796  *
2797  * [69] PEReference ::= '%' Name ';'
2798  *
2799  * Returns A newly allocated string with the substitution done. The caller
2800  *      must deallocate it !
2801  */
2802 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2803 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2804 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2805     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2806     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2807            end, end2, end3));
2808 }
2809 
2810 /************************************************************************
2811  *									*
2812  *		Commodity functions, cleanup needed ?			*
2813  *									*
2814  ************************************************************************/
2815 
2816 /**
2817  * areBlanks:
2818  * @ctxt:  an XML parser context
2819  * @str:  a xmlChar *
2820  * @len:  the size of @str
2821  * @blank_chars: we know the chars are blanks
2822  *
2823  * Is this a sequence of blank chars that one can ignore ?
2824  *
2825  * Returns 1 if ignorable 0 otherwise.
2826  */
2827 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2828 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2829                      int blank_chars) {
2830     int i, ret;
2831     xmlNodePtr lastChild;
2832 
2833     /*
2834      * Don't spend time trying to differentiate them, the same callback is
2835      * used !
2836      */
2837     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2838 	return(0);
2839 
2840     /*
2841      * Check for xml:space value.
2842      */
2843     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2844         (*(ctxt->space) == -2))
2845 	return(0);
2846 
2847     /*
2848      * Check that the string is made of blanks
2849      */
2850     if (blank_chars == 0) {
2851 	for (i = 0;i < len;i++)
2852 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2853     }
2854 
2855     /*
2856      * Look if the element is mixed content in the DTD if available
2857      */
2858     if (ctxt->node == NULL) return(0);
2859     if (ctxt->myDoc != NULL) {
2860 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2861         if (ret == 0) return(1);
2862         if (ret == 1) return(0);
2863     }
2864 
2865     /*
2866      * Otherwise, heuristic :-\
2867      */
2868     if ((RAW != '<') && (RAW != 0xD)) return(0);
2869     if ((ctxt->node->children == NULL) &&
2870 	(RAW == '<') && (NXT(1) == '/')) return(0);
2871 
2872     lastChild = xmlGetLastChild(ctxt->node);
2873     if (lastChild == NULL) {
2874         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2875             (ctxt->node->content != NULL)) return(0);
2876     } else if (xmlNodeIsText(lastChild))
2877         return(0);
2878     else if ((ctxt->node->children != NULL) &&
2879              (xmlNodeIsText(ctxt->node->children)))
2880         return(0);
2881     return(1);
2882 }
2883 
2884 /************************************************************************
2885  *									*
2886  *		Extra stuff for namespace support			*
2887  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2888  *									*
2889  ************************************************************************/
2890 
2891 /**
2892  * xmlSplitQName:
2893  * @ctxt:  an XML parser context
2894  * @name:  an XML parser context
2895  * @prefix:  a xmlChar **
2896  *
2897  * parse an UTF8 encoded XML qualified name string
2898  *
2899  * [NS 5] QName ::= (Prefix ':')? LocalPart
2900  *
2901  * [NS 6] Prefix ::= NCName
2902  *
2903  * [NS 7] LocalPart ::= NCName
2904  *
2905  * Returns the local part, and prefix is updated
2906  *   to get the Prefix if any.
2907  */
2908 
2909 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2910 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2911     xmlChar buf[XML_MAX_NAMELEN + 5];
2912     xmlChar *buffer = NULL;
2913     int len = 0;
2914     int max = XML_MAX_NAMELEN;
2915     xmlChar *ret = NULL;
2916     const xmlChar *cur = name;
2917     int c;
2918 
2919     if (prefix == NULL) return(NULL);
2920     *prefix = NULL;
2921 
2922     if (cur == NULL) return(NULL);
2923 
2924 #ifndef XML_XML_NAMESPACE
2925     /* xml: prefix is not really a namespace */
2926     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2927         (cur[2] == 'l') && (cur[3] == ':'))
2928 	return(xmlStrdup(name));
2929 #endif
2930 
2931     /* nasty but well=formed */
2932     if (cur[0] == ':')
2933 	return(xmlStrdup(name));
2934 
2935     c = *cur++;
2936     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2937 	buf[len++] = c;
2938 	c = *cur++;
2939     }
2940     if (len >= max) {
2941 	/*
2942 	 * Okay someone managed to make a huge name, so he's ready to pay
2943 	 * for the processing speed.
2944 	 */
2945 	max = len * 2;
2946 
2947 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2948 	if (buffer == NULL) {
2949 	    xmlErrMemory(ctxt, NULL);
2950 	    return(NULL);
2951 	}
2952 	memcpy(buffer, buf, len);
2953 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2954 	    if (len + 10 > max) {
2955 	        xmlChar *tmp;
2956 
2957 		max *= 2;
2958 		tmp = (xmlChar *) xmlRealloc(buffer,
2959 						max * sizeof(xmlChar));
2960 		if (tmp == NULL) {
2961 		    xmlFree(buffer);
2962 		    xmlErrMemory(ctxt, NULL);
2963 		    return(NULL);
2964 		}
2965 		buffer = tmp;
2966 	    }
2967 	    buffer[len++] = c;
2968 	    c = *cur++;
2969 	}
2970 	buffer[len] = 0;
2971     }
2972 
2973     if ((c == ':') && (*cur == 0)) {
2974         if (buffer != NULL)
2975 	    xmlFree(buffer);
2976 	*prefix = NULL;
2977 	return(xmlStrdup(name));
2978     }
2979 
2980     if (buffer == NULL)
2981 	ret = xmlStrndup(buf, len);
2982     else {
2983 	ret = buffer;
2984 	buffer = NULL;
2985 	max = XML_MAX_NAMELEN;
2986     }
2987 
2988 
2989     if (c == ':') {
2990 	c = *cur;
2991         *prefix = ret;
2992 	if (c == 0) {
2993 	    return(xmlStrndup(BAD_CAST "", 0));
2994 	}
2995 	len = 0;
2996 
2997 	/*
2998 	 * Check that the first character is proper to start
2999 	 * a new name
3000 	 */
3001 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3002 	      ((c >= 0x41) && (c <= 0x5A)) ||
3003 	      (c == '_') || (c == ':'))) {
3004 	    int l;
3005 	    int first = CUR_SCHAR(cur, l);
3006 
3007 	    if (!IS_LETTER(first) && (first != '_')) {
3008 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3009 			    "Name %s is not XML Namespace compliant\n",
3010 				  name);
3011 	    }
3012 	}
3013 	cur++;
3014 
3015 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3016 	    buf[len++] = c;
3017 	    c = *cur++;
3018 	}
3019 	if (len >= max) {
3020 	    /*
3021 	     * Okay someone managed to make a huge name, so he's ready to pay
3022 	     * for the processing speed.
3023 	     */
3024 	    max = len * 2;
3025 
3026 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3027 	    if (buffer == NULL) {
3028 	        xmlErrMemory(ctxt, NULL);
3029 		return(NULL);
3030 	    }
3031 	    memcpy(buffer, buf, len);
3032 	    while (c != 0) { /* tested bigname2.xml */
3033 		if (len + 10 > max) {
3034 		    xmlChar *tmp;
3035 
3036 		    max *= 2;
3037 		    tmp = (xmlChar *) xmlRealloc(buffer,
3038 						    max * sizeof(xmlChar));
3039 		    if (tmp == NULL) {
3040 			xmlErrMemory(ctxt, NULL);
3041 			xmlFree(buffer);
3042 			return(NULL);
3043 		    }
3044 		    buffer = tmp;
3045 		}
3046 		buffer[len++] = c;
3047 		c = *cur++;
3048 	    }
3049 	    buffer[len] = 0;
3050 	}
3051 
3052 	if (buffer == NULL)
3053 	    ret = xmlStrndup(buf, len);
3054 	else {
3055 	    ret = buffer;
3056 	}
3057     }
3058 
3059     return(ret);
3060 }
3061 
3062 /************************************************************************
3063  *									*
3064  *			The parser itself				*
3065  *	Relates to http://www.w3.org/TR/REC-xml				*
3066  *									*
3067  ************************************************************************/
3068 
3069 /************************************************************************
3070  *									*
3071  *	Routines to parse Name, NCName and NmToken			*
3072  *									*
3073  ************************************************************************/
3074 #ifdef DEBUG
3075 static unsigned long nbParseName = 0;
3076 static unsigned long nbParseNmToken = 0;
3077 static unsigned long nbParseNCName = 0;
3078 static unsigned long nbParseNCNameComplex = 0;
3079 static unsigned long nbParseNameComplex = 0;
3080 static unsigned long nbParseStringName = 0;
3081 #endif
3082 
3083 /*
3084  * The two following functions are related to the change of accepted
3085  * characters for Name and NmToken in the Revision 5 of XML-1.0
3086  * They correspond to the modified production [4] and the new production [4a]
3087  * changes in that revision. Also note that the macros used for the
3088  * productions Letter, Digit, CombiningChar and Extender are not needed
3089  * anymore.
3090  * We still keep compatibility to pre-revision5 parsing semantic if the
3091  * new XML_PARSE_OLD10 option is given to the parser.
3092  */
3093 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3094 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3095     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3096         /*
3097 	 * Use the new checks of production [4] [4a] amd [5] of the
3098 	 * Update 5 of XML-1.0
3099 	 */
3100 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3101 	    (((c >= 'a') && (c <= 'z')) ||
3102 	     ((c >= 'A') && (c <= 'Z')) ||
3103 	     (c == '_') || (c == ':') ||
3104 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3105 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3106 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3107 	     ((c >= 0x370) && (c <= 0x37D)) ||
3108 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3109 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3110 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3111 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3112 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3113 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3114 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3115 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3116 	    return(1);
3117     } else {
3118         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3119 	    return(1);
3120     }
3121     return(0);
3122 }
3123 
3124 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3125 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3126     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3127         /*
3128 	 * Use the new checks of production [4] [4a] amd [5] of the
3129 	 * Update 5 of XML-1.0
3130 	 */
3131 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3132 	    (((c >= 'a') && (c <= 'z')) ||
3133 	     ((c >= 'A') && (c <= 'Z')) ||
3134 	     ((c >= '0') && (c <= '9')) || /* !start */
3135 	     (c == '_') || (c == ':') ||
3136 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3137 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3138 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3139 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3140 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3141 	     ((c >= 0x370) && (c <= 0x37D)) ||
3142 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3144 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3145 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3146 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3147 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3148 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3149 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3150 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3151 	     return(1);
3152     } else {
3153         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3154             (c == '.') || (c == '-') ||
3155 	    (c == '_') || (c == ':') ||
3156 	    (IS_COMBINING(c)) ||
3157 	    (IS_EXTENDER(c)))
3158 	    return(1);
3159     }
3160     return(0);
3161 }
3162 
3163 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3164                                           int *len, int *alloc, int normalize);
3165 
3166 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3167 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3168     int len = 0, l;
3169     int c;
3170     int count = 0;
3171 
3172 #ifdef DEBUG
3173     nbParseNameComplex++;
3174 #endif
3175 
3176     /*
3177      * Handler for more complex cases
3178      */
3179     GROW;
3180     c = CUR_CHAR(l);
3181     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3182         /*
3183 	 * Use the new checks of production [4] [4a] amd [5] of the
3184 	 * Update 5 of XML-1.0
3185 	 */
3186 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 	    (!(((c >= 'a') && (c <= 'z')) ||
3188 	       ((c >= 'A') && (c <= 'Z')) ||
3189 	       (c == '_') || (c == ':') ||
3190 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3191 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3192 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3193 	       ((c >= 0x370) && (c <= 0x37D)) ||
3194 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3196 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3197 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3198 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3199 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3200 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3201 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3202 	    return(NULL);
3203 	}
3204 	len += l;
3205 	NEXTL(l);
3206 	c = CUR_CHAR(l);
3207 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 	       (((c >= 'a') && (c <= 'z')) ||
3209 	        ((c >= 'A') && (c <= 'Z')) ||
3210 	        ((c >= '0') && (c <= '9')) || /* !start */
3211 	        (c == '_') || (c == ':') ||
3212 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3213 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3214 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3215 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3216 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3217 	        ((c >= 0x370) && (c <= 0x37D)) ||
3218 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3219 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3220 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3221 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3222 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3223 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3224 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3225 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3226 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3227 		)) {
3228 	    if (count++ > 100) {
3229 		count = 0;
3230 		GROW;
3231 	    }
3232 	    len += l;
3233 	    NEXTL(l);
3234 	    c = CUR_CHAR(l);
3235 	}
3236     } else {
3237 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238 	    (!IS_LETTER(c) && (c != '_') &&
3239 	     (c != ':'))) {
3240 	    return(NULL);
3241 	}
3242 	len += l;
3243 	NEXTL(l);
3244 	c = CUR_CHAR(l);
3245 
3246 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3247 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3248 		(c == '.') || (c == '-') ||
3249 		(c == '_') || (c == ':') ||
3250 		(IS_COMBINING(c)) ||
3251 		(IS_EXTENDER(c)))) {
3252 	    if (count++ > 100) {
3253 		count = 0;
3254 		GROW;
3255 	    }
3256 	    len += l;
3257 	    NEXTL(l);
3258 	    c = CUR_CHAR(l);
3259 	}
3260     }
3261     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3262         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3263     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3264 }
3265 
3266 /**
3267  * xmlParseName:
3268  * @ctxt:  an XML parser context
3269  *
3270  * parse an XML name.
3271  *
3272  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3273  *                  CombiningChar | Extender
3274  *
3275  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3276  *
3277  * [6] Names ::= Name (#x20 Name)*
3278  *
3279  * Returns the Name parsed or NULL
3280  */
3281 
3282 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3283 xmlParseName(xmlParserCtxtPtr ctxt) {
3284     const xmlChar *in;
3285     const xmlChar *ret;
3286     int count = 0;
3287 
3288     GROW;
3289 
3290 #ifdef DEBUG
3291     nbParseName++;
3292 #endif
3293 
3294     /*
3295      * Accelerator for simple ASCII names
3296      */
3297     in = ctxt->input->cur;
3298     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3299 	((*in >= 0x41) && (*in <= 0x5A)) ||
3300 	(*in == '_') || (*in == ':')) {
3301 	in++;
3302 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3303 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3304 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3305 	       (*in == '_') || (*in == '-') ||
3306 	       (*in == ':') || (*in == '.'))
3307 	    in++;
3308 	if ((*in > 0) && (*in < 0x80)) {
3309 	    count = in - ctxt->input->cur;
3310 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3311 	    ctxt->input->cur = in;
3312 	    ctxt->nbChars += count;
3313 	    ctxt->input->col += count;
3314 	    if (ret == NULL)
3315 	        xmlErrMemory(ctxt, NULL);
3316 	    return(ret);
3317 	}
3318     }
3319     /* accelerator for special cases */
3320     return(xmlParseNameComplex(ctxt));
3321 }
3322 
3323 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3324 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3325     int len = 0, l;
3326     int c;
3327     int count = 0;
3328 
3329 #ifdef DEBUG
3330     nbParseNCNameComplex++;
3331 #endif
3332 
3333     /*
3334      * Handler for more complex cases
3335      */
3336     GROW;
3337     c = CUR_CHAR(l);
3338     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3339 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3340 	return(NULL);
3341     }
3342 
3343     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3344 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3345 	if (count++ > 100) {
3346 	    count = 0;
3347 	    GROW;
3348 	}
3349 	len += l;
3350 	NEXTL(l);
3351 	c = CUR_CHAR(l);
3352     }
3353     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3354 }
3355 
3356 /**
3357  * xmlParseNCName:
3358  * @ctxt:  an XML parser context
3359  * @len:  lenght of the string parsed
3360  *
3361  * parse an XML name.
3362  *
3363  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3364  *                      CombiningChar | Extender
3365  *
3366  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3367  *
3368  * Returns the Name parsed or NULL
3369  */
3370 
3371 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3372 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3373     const xmlChar *in;
3374     const xmlChar *ret;
3375     int count = 0;
3376 
3377 #ifdef DEBUG
3378     nbParseNCName++;
3379 #endif
3380 
3381     /*
3382      * Accelerator for simple ASCII names
3383      */
3384     in = ctxt->input->cur;
3385     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3386 	((*in >= 0x41) && (*in <= 0x5A)) ||
3387 	(*in == '_')) {
3388 	in++;
3389 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3390 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3391 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3392 	       (*in == '_') || (*in == '-') ||
3393 	       (*in == '.'))
3394 	    in++;
3395 	if ((*in > 0) && (*in < 0x80)) {
3396 	    count = in - ctxt->input->cur;
3397 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3398 	    ctxt->input->cur = in;
3399 	    ctxt->nbChars += count;
3400 	    ctxt->input->col += count;
3401 	    if (ret == NULL) {
3402 	        xmlErrMemory(ctxt, NULL);
3403 	    }
3404 	    return(ret);
3405 	}
3406     }
3407     return(xmlParseNCNameComplex(ctxt));
3408 }
3409 
3410 /**
3411  * xmlParseNameAndCompare:
3412  * @ctxt:  an XML parser context
3413  *
3414  * parse an XML name and compares for match
3415  * (specialized for endtag parsing)
3416  *
3417  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3418  * and the name for mismatch
3419  */
3420 
3421 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3422 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3423     register const xmlChar *cmp = other;
3424     register const xmlChar *in;
3425     const xmlChar *ret;
3426 
3427     GROW;
3428 
3429     in = ctxt->input->cur;
3430     while (*in != 0 && *in == *cmp) {
3431 	++in;
3432 	++cmp;
3433 	ctxt->input->col++;
3434     }
3435     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3436 	/* success */
3437 	ctxt->input->cur = in;
3438 	return (const xmlChar*) 1;
3439     }
3440     /* failure (or end of input buffer), check with full function */
3441     ret = xmlParseName (ctxt);
3442     /* strings coming from the dictionnary direct compare possible */
3443     if (ret == other) {
3444 	return (const xmlChar*) 1;
3445     }
3446     return ret;
3447 }
3448 
3449 /**
3450  * xmlParseStringName:
3451  * @ctxt:  an XML parser context
3452  * @str:  a pointer to the string pointer (IN/OUT)
3453  *
3454  * parse an XML name.
3455  *
3456  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3457  *                  CombiningChar | Extender
3458  *
3459  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3460  *
3461  * [6] Names ::= Name (#x20 Name)*
3462  *
3463  * Returns the Name parsed or NULL. The @str pointer
3464  * is updated to the current location in the string.
3465  */
3466 
3467 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3468 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3469     xmlChar buf[XML_MAX_NAMELEN + 5];
3470     const xmlChar *cur = *str;
3471     int len = 0, l;
3472     int c;
3473 
3474 #ifdef DEBUG
3475     nbParseStringName++;
3476 #endif
3477 
3478     c = CUR_SCHAR(cur, l);
3479     if (!xmlIsNameStartChar(ctxt, c)) {
3480 	return(NULL);
3481     }
3482 
3483     COPY_BUF(l,buf,len,c);
3484     cur += l;
3485     c = CUR_SCHAR(cur, l);
3486     while (xmlIsNameChar(ctxt, c)) {
3487 	COPY_BUF(l,buf,len,c);
3488 	cur += l;
3489 	c = CUR_SCHAR(cur, l);
3490 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3491 	    /*
3492 	     * Okay someone managed to make a huge name, so he's ready to pay
3493 	     * for the processing speed.
3494 	     */
3495 	    xmlChar *buffer;
3496 	    int max = len * 2;
3497 
3498 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3499 	    if (buffer == NULL) {
3500 	        xmlErrMemory(ctxt, NULL);
3501 		return(NULL);
3502 	    }
3503 	    memcpy(buffer, buf, len);
3504 	    while (xmlIsNameChar(ctxt, c)) {
3505 		if (len + 10 > max) {
3506 		    xmlChar *tmp;
3507 		    max *= 2;
3508 		    tmp = (xmlChar *) xmlRealloc(buffer,
3509 			                            max * sizeof(xmlChar));
3510 		    if (tmp == NULL) {
3511 			xmlErrMemory(ctxt, NULL);
3512 			xmlFree(buffer);
3513 			return(NULL);
3514 		    }
3515 		    buffer = tmp;
3516 		}
3517 		COPY_BUF(l,buffer,len,c);
3518 		cur += l;
3519 		c = CUR_SCHAR(cur, l);
3520 	    }
3521 	    buffer[len] = 0;
3522 	    *str = cur;
3523 	    return(buffer);
3524 	}
3525     }
3526     *str = cur;
3527     return(xmlStrndup(buf, len));
3528 }
3529 
3530 /**
3531  * xmlParseNmtoken:
3532  * @ctxt:  an XML parser context
3533  *
3534  * parse an XML Nmtoken.
3535  *
3536  * [7] Nmtoken ::= (NameChar)+
3537  *
3538  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3539  *
3540  * Returns the Nmtoken parsed or NULL
3541  */
3542 
3543 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3544 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3545     xmlChar buf[XML_MAX_NAMELEN + 5];
3546     int len = 0, l;
3547     int c;
3548     int count = 0;
3549 
3550 #ifdef DEBUG
3551     nbParseNmToken++;
3552 #endif
3553 
3554     GROW;
3555     c = CUR_CHAR(l);
3556 
3557     while (xmlIsNameChar(ctxt, c)) {
3558 	if (count++ > 100) {
3559 	    count = 0;
3560 	    GROW;
3561 	}
3562 	COPY_BUF(l,buf,len,c);
3563 	NEXTL(l);
3564 	c = CUR_CHAR(l);
3565 	if (len >= XML_MAX_NAMELEN) {
3566 	    /*
3567 	     * Okay someone managed to make a huge token, so he's ready to pay
3568 	     * for the processing speed.
3569 	     */
3570 	    xmlChar *buffer;
3571 	    int max = len * 2;
3572 
3573 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3574 	    if (buffer == NULL) {
3575 	        xmlErrMemory(ctxt, NULL);
3576 		return(NULL);
3577 	    }
3578 	    memcpy(buffer, buf, len);
3579 	    while (xmlIsNameChar(ctxt, c)) {
3580 		if (count++ > 100) {
3581 		    count = 0;
3582 		    GROW;
3583 		}
3584 		if (len + 10 > max) {
3585 		    xmlChar *tmp;
3586 
3587 		    max *= 2;
3588 		    tmp = (xmlChar *) xmlRealloc(buffer,
3589 			                            max * sizeof(xmlChar));
3590 		    if (tmp == NULL) {
3591 			xmlErrMemory(ctxt, NULL);
3592 			xmlFree(buffer);
3593 			return(NULL);
3594 		    }
3595 		    buffer = tmp;
3596 		}
3597 		COPY_BUF(l,buffer,len,c);
3598 		NEXTL(l);
3599 		c = CUR_CHAR(l);
3600 	    }
3601 	    buffer[len] = 0;
3602 	    return(buffer);
3603 	}
3604     }
3605     if (len == 0)
3606         return(NULL);
3607     return(xmlStrndup(buf, len));
3608 }
3609 
3610 /**
3611  * xmlParseEntityValue:
3612  * @ctxt:  an XML parser context
3613  * @orig:  if non-NULL store a copy of the original entity value
3614  *
3615  * parse a value for ENTITY declarations
3616  *
3617  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3618  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3619  *
3620  * Returns the EntityValue parsed with reference substituted or NULL
3621  */
3622 
3623 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3624 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3625     xmlChar *buf = NULL;
3626     int len = 0;
3627     int size = XML_PARSER_BUFFER_SIZE;
3628     int c, l;
3629     xmlChar stop;
3630     xmlChar *ret = NULL;
3631     const xmlChar *cur = NULL;
3632     xmlParserInputPtr input;
3633 
3634     if (RAW == '"') stop = '"';
3635     else if (RAW == '\'') stop = '\'';
3636     else {
3637 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3638 	return(NULL);
3639     }
3640     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3641     if (buf == NULL) {
3642 	xmlErrMemory(ctxt, NULL);
3643 	return(NULL);
3644     }
3645 
3646     /*
3647      * The content of the entity definition is copied in a buffer.
3648      */
3649 
3650     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3651     input = ctxt->input;
3652     GROW;
3653     NEXT;
3654     c = CUR_CHAR(l);
3655     /*
3656      * NOTE: 4.4.5 Included in Literal
3657      * When a parameter entity reference appears in a literal entity
3658      * value, ... a single or double quote character in the replacement
3659      * text is always treated as a normal data character and will not
3660      * terminate the literal.
3661      * In practice it means we stop the loop only when back at parsing
3662      * the initial entity and the quote is found
3663      */
3664     while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3665 	   (ctxt->input != input))) {
3666 	if (len + 5 >= size) {
3667 	    xmlChar *tmp;
3668 
3669 	    size *= 2;
3670 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3671 	    if (tmp == NULL) {
3672 		xmlErrMemory(ctxt, NULL);
3673 		xmlFree(buf);
3674 		return(NULL);
3675 	    }
3676 	    buf = tmp;
3677 	}
3678 	COPY_BUF(l,buf,len,c);
3679 	NEXTL(l);
3680 	/*
3681 	 * Pop-up of finished entities.
3682 	 */
3683 	while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3684 	    xmlPopInput(ctxt);
3685 
3686 	GROW;
3687 	c = CUR_CHAR(l);
3688 	if (c == 0) {
3689 	    GROW;
3690 	    c = CUR_CHAR(l);
3691 	}
3692     }
3693     buf[len] = 0;
3694 
3695     /*
3696      * Raise problem w.r.t. '&' and '%' being used in non-entities
3697      * reference constructs. Note Charref will be handled in
3698      * xmlStringDecodeEntities()
3699      */
3700     cur = buf;
3701     while (*cur != 0) { /* non input consuming */
3702 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3703 	    xmlChar *name;
3704 	    xmlChar tmp = *cur;
3705 
3706 	    cur++;
3707 	    name = xmlParseStringName(ctxt, &cur);
3708             if ((name == NULL) || (*cur != ';')) {
3709 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3710 	    "EntityValue: '%c' forbidden except for entities references\n",
3711 	                          tmp);
3712 	    }
3713 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3714 		(ctxt->inputNr == 1)) {
3715 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3716 	    }
3717 	    if (name != NULL)
3718 		xmlFree(name);
3719 	    if (*cur == 0)
3720 	        break;
3721 	}
3722 	cur++;
3723     }
3724 
3725     /*
3726      * Then PEReference entities are substituted.
3727      */
3728     if (c != stop) {
3729 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3730 	xmlFree(buf);
3731     } else {
3732 	NEXT;
3733 	/*
3734 	 * NOTE: 4.4.7 Bypassed
3735 	 * When a general entity reference appears in the EntityValue in
3736 	 * an entity declaration, it is bypassed and left as is.
3737 	 * so XML_SUBSTITUTE_REF is not set here.
3738 	 */
3739 	ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3740 				      0, 0, 0);
3741 	if (orig != NULL)
3742 	    *orig = buf;
3743 	else
3744 	    xmlFree(buf);
3745     }
3746 
3747     return(ret);
3748 }
3749 
3750 /**
3751  * xmlParseAttValueComplex:
3752  * @ctxt:  an XML parser context
3753  * @len:   the resulting attribute len
3754  * @normalize:  wether to apply the inner normalization
3755  *
3756  * parse a value for an attribute, this is the fallback function
3757  * of xmlParseAttValue() when the attribute parsing requires handling
3758  * of non-ASCII characters, or normalization compaction.
3759  *
3760  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3761  */
3762 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3763 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3764     xmlChar limit = 0;
3765     xmlChar *buf = NULL;
3766     xmlChar *rep = NULL;
3767     int len = 0;
3768     int buf_size = 0;
3769     int c, l, in_space = 0;
3770     xmlChar *current = NULL;
3771     xmlEntityPtr ent;
3772 
3773     if (NXT(0) == '"') {
3774 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3775 	limit = '"';
3776         NEXT;
3777     } else if (NXT(0) == '\'') {
3778 	limit = '\'';
3779 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3780         NEXT;
3781     } else {
3782 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3783 	return(NULL);
3784     }
3785 
3786     /*
3787      * allocate a translation buffer.
3788      */
3789     buf_size = XML_PARSER_BUFFER_SIZE;
3790     buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3791     if (buf == NULL) goto mem_error;
3792 
3793     /*
3794      * OK loop until we reach one of the ending char or a size limit.
3795      */
3796     c = CUR_CHAR(l);
3797     while ((NXT(0) != limit) && /* checked */
3798            (IS_CHAR(c)) && (c != '<')) {
3799 	if (c == 0) break;
3800 	if (c == '&') {
3801 	    in_space = 0;
3802 	    if (NXT(1) == '#') {
3803 		int val = xmlParseCharRef(ctxt);
3804 
3805 		if (val == '&') {
3806 		    if (ctxt->replaceEntities) {
3807 			if (len > buf_size - 10) {
3808 			    growBuffer(buf, 10);
3809 			}
3810 			buf[len++] = '&';
3811 		    } else {
3812 			/*
3813 			 * The reparsing will be done in xmlStringGetNodeList()
3814 			 * called by the attribute() function in SAX.c
3815 			 */
3816 			if (len > buf_size - 10) {
3817 			    growBuffer(buf, 10);
3818 			}
3819 			buf[len++] = '&';
3820 			buf[len++] = '#';
3821 			buf[len++] = '3';
3822 			buf[len++] = '8';
3823 			buf[len++] = ';';
3824 		    }
3825 		} else if (val != 0) {
3826 		    if (len > buf_size - 10) {
3827 			growBuffer(buf, 10);
3828 		    }
3829 		    len += xmlCopyChar(0, &buf[len], val);
3830 		}
3831 	    } else {
3832 		ent = xmlParseEntityRef(ctxt);
3833 		ctxt->nbentities++;
3834 		if (ent != NULL)
3835 		    ctxt->nbentities += ent->owner;
3836 		if ((ent != NULL) &&
3837 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3838 		    if (len > buf_size - 10) {
3839 			growBuffer(buf, 10);
3840 		    }
3841 		    if ((ctxt->replaceEntities == 0) &&
3842 		        (ent->content[0] == '&')) {
3843 			buf[len++] = '&';
3844 			buf[len++] = '#';
3845 			buf[len++] = '3';
3846 			buf[len++] = '8';
3847 			buf[len++] = ';';
3848 		    } else {
3849 			buf[len++] = ent->content[0];
3850 		    }
3851 		} else if ((ent != NULL) &&
3852 		           (ctxt->replaceEntities != 0)) {
3853 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3854 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3855 						      XML_SUBSTITUTE_REF,
3856 						      0, 0, 0);
3857 			if (rep != NULL) {
3858 			    current = rep;
3859 			    while (*current != 0) { /* non input consuming */
3860                                 if ((*current == 0xD) || (*current == 0xA) ||
3861                                     (*current == 0x9)) {
3862                                     buf[len++] = 0x20;
3863                                     current++;
3864                                 } else
3865                                     buf[len++] = *current++;
3866 				if (len > buf_size - 10) {
3867 				    growBuffer(buf, 10);
3868 				}
3869 			    }
3870 			    xmlFree(rep);
3871 			    rep = NULL;
3872 			}
3873 		    } else {
3874 			if (len > buf_size - 10) {
3875 			    growBuffer(buf, 10);
3876 			}
3877 			if (ent->content != NULL)
3878 			    buf[len++] = ent->content[0];
3879 		    }
3880 		} else if (ent != NULL) {
3881 		    int i = xmlStrlen(ent->name);
3882 		    const xmlChar *cur = ent->name;
3883 
3884 		    /*
3885 		     * This may look absurd but is needed to detect
3886 		     * entities problems
3887 		     */
3888 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3889 			(ent->content != NULL)) {
3890 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3891 						  XML_SUBSTITUTE_REF, 0, 0, 0);
3892 			if (rep != NULL) {
3893 			    xmlFree(rep);
3894 			    rep = NULL;
3895 			}
3896 		    }
3897 
3898 		    /*
3899 		     * Just output the reference
3900 		     */
3901 		    buf[len++] = '&';
3902 		    while (len > buf_size - i - 10) {
3903 			growBuffer(buf, i + 10);
3904 		    }
3905 		    for (;i > 0;i--)
3906 			buf[len++] = *cur++;
3907 		    buf[len++] = ';';
3908 		}
3909 	    }
3910 	} else {
3911 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3912 	        if ((len != 0) || (!normalize)) {
3913 		    if ((!normalize) || (!in_space)) {
3914 			COPY_BUF(l,buf,len,0x20);
3915 			while (len > buf_size - 10) {
3916 			    growBuffer(buf, 10);
3917 			}
3918 		    }
3919 		    in_space = 1;
3920 		}
3921 	    } else {
3922 	        in_space = 0;
3923 		COPY_BUF(l,buf,len,c);
3924 		if (len > buf_size - 10) {
3925 		    growBuffer(buf, 10);
3926 		}
3927 	    }
3928 	    NEXTL(l);
3929 	}
3930 	GROW;
3931 	c = CUR_CHAR(l);
3932     }
3933     if ((in_space) && (normalize)) {
3934         while (buf[len - 1] == 0x20) len--;
3935     }
3936     buf[len] = 0;
3937     if (RAW == '<') {
3938 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3939     } else if (RAW != limit) {
3940 	if ((c != 0) && (!IS_CHAR(c))) {
3941 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3942 			   "invalid character in attribute value\n");
3943 	} else {
3944 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3945 			   "AttValue: ' expected\n");
3946         }
3947     } else
3948 	NEXT;
3949     if (attlen != NULL) *attlen = len;
3950     return(buf);
3951 
3952 mem_error:
3953     xmlErrMemory(ctxt, NULL);
3954     if (buf != NULL)
3955         xmlFree(buf);
3956     if (rep != NULL)
3957         xmlFree(rep);
3958     return(NULL);
3959 }
3960 
3961 /**
3962  * xmlParseAttValue:
3963  * @ctxt:  an XML parser context
3964  *
3965  * parse a value for an attribute
3966  * Note: the parser won't do substitution of entities here, this
3967  * will be handled later in xmlStringGetNodeList
3968  *
3969  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3970  *                   "'" ([^<&'] | Reference)* "'"
3971  *
3972  * 3.3.3 Attribute-Value Normalization:
3973  * Before the value of an attribute is passed to the application or
3974  * checked for validity, the XML processor must normalize it as follows:
3975  * - a character reference is processed by appending the referenced
3976  *   character to the attribute value
3977  * - an entity reference is processed by recursively processing the
3978  *   replacement text of the entity
3979  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3980  *   appending #x20 to the normalized value, except that only a single
3981  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
3982  *   parsed entity or the literal entity value of an internal parsed entity
3983  * - other characters are processed by appending them to the normalized value
3984  * If the declared value is not CDATA, then the XML processor must further
3985  * process the normalized attribute value by discarding any leading and
3986  * trailing space (#x20) characters, and by replacing sequences of space
3987  * (#x20) characters by a single space (#x20) character.
3988  * All attributes for which no declaration has been read should be treated
3989  * by a non-validating parser as if declared CDATA.
3990  *
3991  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3992  */
3993 
3994 
3995 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)3996 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3997     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3998     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3999 }
4000 
4001 /**
4002  * xmlParseSystemLiteral:
4003  * @ctxt:  an XML parser context
4004  *
4005  * parse an XML Literal
4006  *
4007  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4008  *
4009  * Returns the SystemLiteral parsed or NULL
4010  */
4011 
4012 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4013 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4014     xmlChar *buf = NULL;
4015     int len = 0;
4016     int size = XML_PARSER_BUFFER_SIZE;
4017     int cur, l;
4018     xmlChar stop;
4019     int state = ctxt->instate;
4020     int count = 0;
4021 
4022     SHRINK;
4023     if (RAW == '"') {
4024         NEXT;
4025 	stop = '"';
4026     } else if (RAW == '\'') {
4027         NEXT;
4028 	stop = '\'';
4029     } else {
4030 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4031 	return(NULL);
4032     }
4033 
4034     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4035     if (buf == NULL) {
4036         xmlErrMemory(ctxt, NULL);
4037 	return(NULL);
4038     }
4039     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4040     cur = CUR_CHAR(l);
4041     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4042 	if (len + 5 >= size) {
4043 	    xmlChar *tmp;
4044 
4045 	    size *= 2;
4046 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4047 	    if (tmp == NULL) {
4048 	        xmlFree(buf);
4049 		xmlErrMemory(ctxt, NULL);
4050 		ctxt->instate = (xmlParserInputState) state;
4051 		return(NULL);
4052 	    }
4053 	    buf = tmp;
4054 	}
4055 	count++;
4056 	if (count > 50) {
4057 	    GROW;
4058 	    count = 0;
4059 	}
4060 	COPY_BUF(l,buf,len,cur);
4061 	NEXTL(l);
4062 	cur = CUR_CHAR(l);
4063 	if (cur == 0) {
4064 	    GROW;
4065 	    SHRINK;
4066 	    cur = CUR_CHAR(l);
4067 	}
4068     }
4069     buf[len] = 0;
4070     ctxt->instate = (xmlParserInputState) state;
4071     if (!IS_CHAR(cur)) {
4072 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4073     } else {
4074 	NEXT;
4075     }
4076     return(buf);
4077 }
4078 
4079 /**
4080  * xmlParsePubidLiteral:
4081  * @ctxt:  an XML parser context
4082  *
4083  * parse an XML public literal
4084  *
4085  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4086  *
4087  * Returns the PubidLiteral parsed or NULL.
4088  */
4089 
4090 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4091 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4092     xmlChar *buf = NULL;
4093     int len = 0;
4094     int size = XML_PARSER_BUFFER_SIZE;
4095     xmlChar cur;
4096     xmlChar stop;
4097     int count = 0;
4098     xmlParserInputState oldstate = ctxt->instate;
4099 
4100     SHRINK;
4101     if (RAW == '"') {
4102         NEXT;
4103 	stop = '"';
4104     } else if (RAW == '\'') {
4105         NEXT;
4106 	stop = '\'';
4107     } else {
4108 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4109 	return(NULL);
4110     }
4111     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4112     if (buf == NULL) {
4113 	xmlErrMemory(ctxt, NULL);
4114 	return(NULL);
4115     }
4116     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4117     cur = CUR;
4118     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4119 	if (len + 1 >= size) {
4120 	    xmlChar *tmp;
4121 
4122 	    size *= 2;
4123 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4124 	    if (tmp == NULL) {
4125 		xmlErrMemory(ctxt, NULL);
4126 		xmlFree(buf);
4127 		return(NULL);
4128 	    }
4129 	    buf = tmp;
4130 	}
4131 	buf[len++] = cur;
4132 	count++;
4133 	if (count > 50) {
4134 	    GROW;
4135 	    count = 0;
4136 	}
4137 	NEXT;
4138 	cur = CUR;
4139 	if (cur == 0) {
4140 	    GROW;
4141 	    SHRINK;
4142 	    cur = CUR;
4143 	}
4144     }
4145     buf[len] = 0;
4146     if (cur != stop) {
4147 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4148     } else {
4149 	NEXT;
4150     }
4151     ctxt->instate = oldstate;
4152     return(buf);
4153 }
4154 
4155 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4156 
4157 /*
4158  * used for the test in the inner loop of the char data testing
4159  */
4160 static const unsigned char test_char_data[256] = {
4161     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4162     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4163     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4164     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4165     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4166     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4167     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4168     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4169     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4170     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4171     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4172     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4173     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4174     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4175     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4176     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4177     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4178     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4184     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4185     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4186     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4187     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4188     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4189     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4190     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4193 };
4194 
4195 /**
4196  * xmlParseCharData:
4197  * @ctxt:  an XML parser context
4198  * @cdata:  int indicating whether we are within a CDATA section
4199  *
4200  * parse a CharData section.
4201  * if we are within a CDATA section ']]>' marks an end of section.
4202  *
4203  * The right angle bracket (>) may be represented using the string "&gt;",
4204  * and must, for compatibility, be escaped using "&gt;" or a character
4205  * reference when it appears in the string "]]>" in content, when that
4206  * string is not marking the end of a CDATA section.
4207  *
4208  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4209  */
4210 
4211 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4212 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4213     const xmlChar *in;
4214     int nbchar = 0;
4215     int line = ctxt->input->line;
4216     int col = ctxt->input->col;
4217     int ccol;
4218 
4219     SHRINK;
4220     GROW;
4221     /*
4222      * Accelerated common case where input don't need to be
4223      * modified before passing it to the handler.
4224      */
4225     if (!cdata) {
4226 	in = ctxt->input->cur;
4227 	do {
4228 get_more_space:
4229 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4230 	    if (*in == 0xA) {
4231 		do {
4232 		    ctxt->input->line++; ctxt->input->col = 1;
4233 		    in++;
4234 		} while (*in == 0xA);
4235 		goto get_more_space;
4236 	    }
4237 	    if (*in == '<') {
4238 		nbchar = in - ctxt->input->cur;
4239 		if (nbchar > 0) {
4240 		    const xmlChar *tmp = ctxt->input->cur;
4241 		    ctxt->input->cur = in;
4242 
4243 		    if ((ctxt->sax != NULL) &&
4244 		        (ctxt->sax->ignorableWhitespace !=
4245 		         ctxt->sax->characters)) {
4246 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4247 			    if (ctxt->sax->ignorableWhitespace != NULL)
4248 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4249 						       tmp, nbchar);
4250 			} else {
4251 			    if (ctxt->sax->characters != NULL)
4252 				ctxt->sax->characters(ctxt->userData,
4253 						      tmp, nbchar);
4254 			    if (*ctxt->space == -1)
4255 			        *ctxt->space = -2;
4256 			}
4257 		    } else if ((ctxt->sax != NULL) &&
4258 		               (ctxt->sax->characters != NULL)) {
4259 			ctxt->sax->characters(ctxt->userData,
4260 					      tmp, nbchar);
4261 		    }
4262 		}
4263 		return;
4264 	    }
4265 
4266 get_more:
4267             ccol = ctxt->input->col;
4268 	    while (test_char_data[*in]) {
4269 		in++;
4270 		ccol++;
4271 	    }
4272 	    ctxt->input->col = ccol;
4273 	    if (*in == 0xA) {
4274 		do {
4275 		    ctxt->input->line++; ctxt->input->col = 1;
4276 		    in++;
4277 		} while (*in == 0xA);
4278 		goto get_more;
4279 	    }
4280 	    if (*in == ']') {
4281 		if ((in[1] == ']') && (in[2] == '>')) {
4282 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4283 		    ctxt->input->cur = in;
4284 		    return;
4285 		}
4286 		in++;
4287 		ctxt->input->col++;
4288 		goto get_more;
4289 	    }
4290 	    nbchar = in - ctxt->input->cur;
4291 	    if (nbchar > 0) {
4292 		if ((ctxt->sax != NULL) &&
4293 		    (ctxt->sax->ignorableWhitespace !=
4294 		     ctxt->sax->characters) &&
4295 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4296 		    const xmlChar *tmp = ctxt->input->cur;
4297 		    ctxt->input->cur = in;
4298 
4299 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4300 		        if (ctxt->sax->ignorableWhitespace != NULL)
4301 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4302 							   tmp, nbchar);
4303 		    } else {
4304 		        if (ctxt->sax->characters != NULL)
4305 			    ctxt->sax->characters(ctxt->userData,
4306 						  tmp, nbchar);
4307 			if (*ctxt->space == -1)
4308 			    *ctxt->space = -2;
4309 		    }
4310                     line = ctxt->input->line;
4311                     col = ctxt->input->col;
4312 		} else if (ctxt->sax != NULL) {
4313 		    if (ctxt->sax->characters != NULL)
4314 			ctxt->sax->characters(ctxt->userData,
4315 					      ctxt->input->cur, nbchar);
4316                     line = ctxt->input->line;
4317                     col = ctxt->input->col;
4318 		}
4319                 /* something really bad happened in the SAX callback */
4320                 if (ctxt->instate != XML_PARSER_CONTENT)
4321                     return;
4322 	    }
4323 	    ctxt->input->cur = in;
4324 	    if (*in == 0xD) {
4325 		in++;
4326 		if (*in == 0xA) {
4327 		    ctxt->input->cur = in;
4328 		    in++;
4329 		    ctxt->input->line++; ctxt->input->col = 1;
4330 		    continue; /* while */
4331 		}
4332 		in--;
4333 	    }
4334 	    if (*in == '<') {
4335 		return;
4336 	    }
4337 	    if (*in == '&') {
4338 		return;
4339 	    }
4340 	    SHRINK;
4341 	    GROW;
4342 	    in = ctxt->input->cur;
4343 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4344 	nbchar = 0;
4345     }
4346     ctxt->input->line = line;
4347     ctxt->input->col = col;
4348     xmlParseCharDataComplex(ctxt, cdata);
4349 }
4350 
4351 /**
4352  * xmlParseCharDataComplex:
4353  * @ctxt:  an XML parser context
4354  * @cdata:  int indicating whether we are within a CDATA section
4355  *
4356  * parse a CharData section.this is the fallback function
4357  * of xmlParseCharData() when the parsing requires handling
4358  * of non-ASCII characters.
4359  */
4360 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4361 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4362     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4363     int nbchar = 0;
4364     int cur, l;
4365     int count = 0;
4366 
4367     SHRINK;
4368     GROW;
4369     cur = CUR_CHAR(l);
4370     while ((cur != '<') && /* checked */
4371            (cur != '&') &&
4372 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4373 	if ((cur == ']') && (NXT(1) == ']') &&
4374 	    (NXT(2) == '>')) {
4375 	    if (cdata) break;
4376 	    else {
4377 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4378 	    }
4379 	}
4380 	COPY_BUF(l,buf,nbchar,cur);
4381 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4382 	    buf[nbchar] = 0;
4383 
4384 	    /*
4385 	     * OK the segment is to be consumed as chars.
4386 	     */
4387 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4388 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4389 		    if (ctxt->sax->ignorableWhitespace != NULL)
4390 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4391 			                               buf, nbchar);
4392 		} else {
4393 		    if (ctxt->sax->characters != NULL)
4394 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4395 		    if ((ctxt->sax->characters !=
4396 		         ctxt->sax->ignorableWhitespace) &&
4397 			(*ctxt->space == -1))
4398 			*ctxt->space = -2;
4399 		}
4400 	    }
4401 	    nbchar = 0;
4402             /* something really bad happened in the SAX callback */
4403             if (ctxt->instate != XML_PARSER_CONTENT)
4404                 return;
4405 	}
4406 	count++;
4407 	if (count > 50) {
4408 	    GROW;
4409 	    count = 0;
4410 	}
4411 	NEXTL(l);
4412 	cur = CUR_CHAR(l);
4413     }
4414     if (nbchar != 0) {
4415         buf[nbchar] = 0;
4416 	/*
4417 	 * OK the segment is to be consumed as chars.
4418 	 */
4419 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4420 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4421 		if (ctxt->sax->ignorableWhitespace != NULL)
4422 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4423 	    } else {
4424 		if (ctxt->sax->characters != NULL)
4425 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4426 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4427 		    (*ctxt->space == -1))
4428 		    *ctxt->space = -2;
4429 	    }
4430 	}
4431     }
4432     if ((cur != 0) && (!IS_CHAR(cur))) {
4433 	/* Generate the error and skip the offending character */
4434         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4435                           "PCDATA invalid Char value %d\n",
4436 	                  cur);
4437 	NEXTL(l);
4438     }
4439 }
4440 
4441 /**
4442  * xmlParseExternalID:
4443  * @ctxt:  an XML parser context
4444  * @publicID:  a xmlChar** receiving PubidLiteral
4445  * @strict: indicate whether we should restrict parsing to only
4446  *          production [75], see NOTE below
4447  *
4448  * Parse an External ID or a Public ID
4449  *
4450  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4451  *       'PUBLIC' S PubidLiteral S SystemLiteral
4452  *
4453  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4454  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4455  *
4456  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4457  *
4458  * Returns the function returns SystemLiteral and in the second
4459  *                case publicID receives PubidLiteral, is strict is off
4460  *                it is possible to return NULL and have publicID set.
4461  */
4462 
4463 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4464 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4465     xmlChar *URI = NULL;
4466 
4467     SHRINK;
4468 
4469     *publicID = NULL;
4470     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4471         SKIP(6);
4472 	if (!IS_BLANK_CH(CUR)) {
4473 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474 	                   "Space required after 'SYSTEM'\n");
4475 	}
4476         SKIP_BLANKS;
4477 	URI = xmlParseSystemLiteral(ctxt);
4478 	if (URI == NULL) {
4479 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4480         }
4481     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4482         SKIP(6);
4483 	if (!IS_BLANK_CH(CUR)) {
4484 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4485 		    "Space required after 'PUBLIC'\n");
4486 	}
4487         SKIP_BLANKS;
4488 	*publicID = xmlParsePubidLiteral(ctxt);
4489 	if (*publicID == NULL) {
4490 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4491 	}
4492 	if (strict) {
4493 	    /*
4494 	     * We don't handle [83] so "S SystemLiteral" is required.
4495 	     */
4496 	    if (!IS_BLANK_CH(CUR)) {
4497 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4498 			"Space required after the Public Identifier\n");
4499 	    }
4500 	} else {
4501 	    /*
4502 	     * We handle [83] so we return immediately, if
4503 	     * "S SystemLiteral" is not detected. From a purely parsing
4504 	     * point of view that's a nice mess.
4505 	     */
4506 	    const xmlChar *ptr;
4507 	    GROW;
4508 
4509 	    ptr = CUR_PTR;
4510 	    if (!IS_BLANK_CH(*ptr)) return(NULL);
4511 
4512 	    while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4513 	    if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4514 	}
4515         SKIP_BLANKS;
4516 	URI = xmlParseSystemLiteral(ctxt);
4517 	if (URI == NULL) {
4518 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4519         }
4520     }
4521     return(URI);
4522 }
4523 
4524 /**
4525  * xmlParseCommentComplex:
4526  * @ctxt:  an XML parser context
4527  * @buf:  the already parsed part of the buffer
4528  * @len:  number of bytes filles in the buffer
4529  * @size:  allocated size of the buffer
4530  *
4531  * Skip an XML (SGML) comment <!-- .... -->
4532  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4533  *  must not occur within comments. "
4534  * This is the slow routine in case the accelerator for ascii didn't work
4535  *
4536  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4537  */
4538 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,int len,int size)4539 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4540     int q, ql;
4541     int r, rl;
4542     int cur, l;
4543     int count = 0;
4544     int inputid;
4545 
4546     inputid = ctxt->input->id;
4547 
4548     if (buf == NULL) {
4549         len = 0;
4550 	size = XML_PARSER_BUFFER_SIZE;
4551 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4552 	if (buf == NULL) {
4553 	    xmlErrMemory(ctxt, NULL);
4554 	    return;
4555 	}
4556     }
4557     GROW;	/* Assure there's enough input data */
4558     q = CUR_CHAR(ql);
4559     if (q == 0)
4560         goto not_terminated;
4561     if (!IS_CHAR(q)) {
4562         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4563                           "xmlParseComment: invalid xmlChar value %d\n",
4564 	                  q);
4565 	xmlFree (buf);
4566 	return;
4567     }
4568     NEXTL(ql);
4569     r = CUR_CHAR(rl);
4570     if (r == 0)
4571         goto not_terminated;
4572     if (!IS_CHAR(r)) {
4573         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4574                           "xmlParseComment: invalid xmlChar value %d\n",
4575 	                  q);
4576 	xmlFree (buf);
4577 	return;
4578     }
4579     NEXTL(rl);
4580     cur = CUR_CHAR(l);
4581     if (cur == 0)
4582         goto not_terminated;
4583     while (IS_CHAR(cur) && /* checked */
4584            ((cur != '>') ||
4585 	    (r != '-') || (q != '-'))) {
4586 	if ((r == '-') && (q == '-')) {
4587 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4588 	}
4589 	if (len + 5 >= size) {
4590 	    xmlChar *new_buf;
4591 	    size *= 2;
4592 	    new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4593 	    if (new_buf == NULL) {
4594 		xmlFree (buf);
4595 		xmlErrMemory(ctxt, NULL);
4596 		return;
4597 	    }
4598 	    buf = new_buf;
4599 	}
4600 	COPY_BUF(ql,buf,len,q);
4601 	q = r;
4602 	ql = rl;
4603 	r = cur;
4604 	rl = l;
4605 
4606 	count++;
4607 	if (count > 50) {
4608 	    GROW;
4609 	    count = 0;
4610 	}
4611 	NEXTL(l);
4612 	cur = CUR_CHAR(l);
4613 	if (cur == 0) {
4614 	    SHRINK;
4615 	    GROW;
4616 	    cur = CUR_CHAR(l);
4617 	}
4618     }
4619     buf[len] = 0;
4620     if (cur == 0) {
4621 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4622 	                     "Comment not terminated \n<!--%.50s\n", buf);
4623     } else if (!IS_CHAR(cur)) {
4624         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4625                           "xmlParseComment: invalid xmlChar value %d\n",
4626 	                  cur);
4627     } else {
4628 	if (inputid != ctxt->input->id) {
4629 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4630 		"Comment doesn't start and stop in the same entity\n");
4631 	}
4632         NEXT;
4633 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4634 	    (!ctxt->disableSAX))
4635 	    ctxt->sax->comment(ctxt->userData, buf);
4636     }
4637     xmlFree(buf);
4638     return;
4639 not_terminated:
4640     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4641 			 "Comment not terminated\n", NULL);
4642     xmlFree(buf);
4643     return;
4644 }
4645 
4646 /**
4647  * xmlParseComment:
4648  * @ctxt:  an XML parser context
4649  *
4650  * Skip an XML (SGML) comment <!-- .... -->
4651  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4652  *  must not occur within comments. "
4653  *
4654  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4655  */
4656 void
xmlParseComment(xmlParserCtxtPtr ctxt)4657 xmlParseComment(xmlParserCtxtPtr ctxt) {
4658     xmlChar *buf = NULL;
4659     int size = XML_PARSER_BUFFER_SIZE;
4660     int len = 0;
4661     xmlParserInputState state;
4662     const xmlChar *in;
4663     int nbchar = 0, ccol;
4664     int inputid;
4665 
4666     /*
4667      * Check that there is a comment right here.
4668      */
4669     if ((RAW != '<') || (NXT(1) != '!') ||
4670         (NXT(2) != '-') || (NXT(3) != '-')) return;
4671     state = ctxt->instate;
4672     ctxt->instate = XML_PARSER_COMMENT;
4673     inputid = ctxt->input->id;
4674     SKIP(4);
4675     SHRINK;
4676     GROW;
4677 
4678     /*
4679      * Accelerated common case where input don't need to be
4680      * modified before passing it to the handler.
4681      */
4682     in = ctxt->input->cur;
4683     do {
4684 	if (*in == 0xA) {
4685 	    do {
4686 		ctxt->input->line++; ctxt->input->col = 1;
4687 		in++;
4688 	    } while (*in == 0xA);
4689 	}
4690 get_more:
4691         ccol = ctxt->input->col;
4692 	while (((*in > '-') && (*in <= 0x7F)) ||
4693 	       ((*in >= 0x20) && (*in < '-')) ||
4694 	       (*in == 0x09)) {
4695 		    in++;
4696 		    ccol++;
4697 	}
4698 	ctxt->input->col = ccol;
4699 	if (*in == 0xA) {
4700 	    do {
4701 		ctxt->input->line++; ctxt->input->col = 1;
4702 		in++;
4703 	    } while (*in == 0xA);
4704 	    goto get_more;
4705 	}
4706 	nbchar = in - ctxt->input->cur;
4707 	/*
4708 	 * save current set of data
4709 	 */
4710 	if (nbchar > 0) {
4711 	    if ((ctxt->sax != NULL) &&
4712 		(ctxt->sax->comment != NULL)) {
4713 		if (buf == NULL) {
4714 		    if ((*in == '-') && (in[1] == '-'))
4715 		        size = nbchar + 1;
4716 		    else
4717 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4718 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4719 		    if (buf == NULL) {
4720 		        xmlErrMemory(ctxt, NULL);
4721 			ctxt->instate = state;
4722 			return;
4723 		    }
4724 		    len = 0;
4725 		} else if (len + nbchar + 1 >= size) {
4726 		    xmlChar *new_buf;
4727 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4728 		    new_buf = (xmlChar *) xmlRealloc(buf,
4729 		                                     size * sizeof(xmlChar));
4730 		    if (new_buf == NULL) {
4731 		        xmlFree (buf);
4732 			xmlErrMemory(ctxt, NULL);
4733 			ctxt->instate = state;
4734 			return;
4735 		    }
4736 		    buf = new_buf;
4737 		}
4738 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4739 		len += nbchar;
4740 		buf[len] = 0;
4741 	    }
4742 	}
4743 	ctxt->input->cur = in;
4744 	if (*in == 0xA) {
4745 	    in++;
4746 	    ctxt->input->line++; ctxt->input->col = 1;
4747 	}
4748 	if (*in == 0xD) {
4749 	    in++;
4750 	    if (*in == 0xA) {
4751 		ctxt->input->cur = in;
4752 		in++;
4753 		ctxt->input->line++; ctxt->input->col = 1;
4754 		continue; /* while */
4755 	    }
4756 	    in--;
4757 	}
4758 	SHRINK;
4759 	GROW;
4760 	in = ctxt->input->cur;
4761 	if (*in == '-') {
4762 	    if (in[1] == '-') {
4763 	        if (in[2] == '>') {
4764 		    if (ctxt->input->id != inputid) {
4765 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4766 			"comment doesn't start and stop in the same entity\n");
4767 		    }
4768 		    SKIP(3);
4769 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4770 		        (!ctxt->disableSAX)) {
4771 			if (buf != NULL)
4772 			    ctxt->sax->comment(ctxt->userData, buf);
4773 			else
4774 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4775 		    }
4776 		    if (buf != NULL)
4777 		        xmlFree(buf);
4778 		    ctxt->instate = state;
4779 		    return;
4780 		}
4781 		if (buf != NULL)
4782 		    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4783 		                      "Comment not terminated \n<!--%.50s\n",
4784 				      buf);
4785 		else
4786 		    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4787 		                      "Comment not terminated \n", NULL);
4788 		in++;
4789 		ctxt->input->col++;
4790 	    }
4791 	    in++;
4792 	    ctxt->input->col++;
4793 	    goto get_more;
4794 	}
4795     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4796     xmlParseCommentComplex(ctxt, buf, len, size);
4797     ctxt->instate = state;
4798     return;
4799 }
4800 
4801 
4802 /**
4803  * xmlParsePITarget:
4804  * @ctxt:  an XML parser context
4805  *
4806  * parse the name of a PI
4807  *
4808  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4809  *
4810  * Returns the PITarget name or NULL
4811  */
4812 
4813 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4814 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4815     const xmlChar *name;
4816 
4817     name = xmlParseName(ctxt);
4818     if ((name != NULL) &&
4819         ((name[0] == 'x') || (name[0] == 'X')) &&
4820         ((name[1] == 'm') || (name[1] == 'M')) &&
4821         ((name[2] == 'l') || (name[2] == 'L'))) {
4822 	int i;
4823 	if ((name[0] == 'x') && (name[1] == 'm') &&
4824 	    (name[2] == 'l') && (name[3] == 0)) {
4825 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4826 		 "XML declaration allowed only at the start of the document\n");
4827 	    return(name);
4828 	} else if (name[3] == 0) {
4829 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4830 	    return(name);
4831 	}
4832 	for (i = 0;;i++) {
4833 	    if (xmlW3CPIs[i] == NULL) break;
4834 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4835 	        return(name);
4836 	}
4837 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4838 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
4839 		      NULL, NULL);
4840     }
4841     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4842 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
4843 		 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4844     }
4845     return(name);
4846 }
4847 
4848 #ifdef LIBXML_CATALOG_ENABLED
4849 /**
4850  * xmlParseCatalogPI:
4851  * @ctxt:  an XML parser context
4852  * @catalog:  the PI value string
4853  *
4854  * parse an XML Catalog Processing Instruction.
4855  *
4856  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4857  *
4858  * Occurs only if allowed by the user and if happening in the Misc
4859  * part of the document before any doctype informations
4860  * This will add the given catalog to the parsing context in order
4861  * to be used if there is a resolution need further down in the document
4862  */
4863 
4864 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)4865 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4866     xmlChar *URL = NULL;
4867     const xmlChar *tmp, *base;
4868     xmlChar marker;
4869 
4870     tmp = catalog;
4871     while (IS_BLANK_CH(*tmp)) tmp++;
4872     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4873 	goto error;
4874     tmp += 7;
4875     while (IS_BLANK_CH(*tmp)) tmp++;
4876     if (*tmp != '=') {
4877 	return;
4878     }
4879     tmp++;
4880     while (IS_BLANK_CH(*tmp)) tmp++;
4881     marker = *tmp;
4882     if ((marker != '\'') && (marker != '"'))
4883 	goto error;
4884     tmp++;
4885     base = tmp;
4886     while ((*tmp != 0) && (*tmp != marker)) tmp++;
4887     if (*tmp == 0)
4888 	goto error;
4889     URL = xmlStrndup(base, tmp - base);
4890     tmp++;
4891     while (IS_BLANK_CH(*tmp)) tmp++;
4892     if (*tmp != 0)
4893 	goto error;
4894 
4895     if (URL != NULL) {
4896 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4897 	xmlFree(URL);
4898     }
4899     return;
4900 
4901 error:
4902     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4903 	          "Catalog PI syntax error: %s\n",
4904 		  catalog, NULL);
4905     if (URL != NULL)
4906 	xmlFree(URL);
4907 }
4908 #endif
4909 
4910 /**
4911  * xmlParsePI:
4912  * @ctxt:  an XML parser context
4913  *
4914  * parse an XML Processing Instruction.
4915  *
4916  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4917  *
4918  * The processing is transfered to SAX once parsed.
4919  */
4920 
4921 void
xmlParsePI(xmlParserCtxtPtr ctxt)4922 xmlParsePI(xmlParserCtxtPtr ctxt) {
4923     xmlChar *buf = NULL;
4924     int len = 0;
4925     int size = XML_PARSER_BUFFER_SIZE;
4926     int cur, l;
4927     const xmlChar *target;
4928     xmlParserInputState state;
4929     int count = 0;
4930 
4931     if ((RAW == '<') && (NXT(1) == '?')) {
4932 	xmlParserInputPtr input = ctxt->input;
4933 	state = ctxt->instate;
4934         ctxt->instate = XML_PARSER_PI;
4935 	/*
4936 	 * this is a Processing Instruction.
4937 	 */
4938 	SKIP(2);
4939 	SHRINK;
4940 
4941 	/*
4942 	 * Parse the target name and check for special support like
4943 	 * namespace.
4944 	 */
4945         target = xmlParsePITarget(ctxt);
4946 	if (target != NULL) {
4947 	    if ((RAW == '?') && (NXT(1) == '>')) {
4948 		if (input != ctxt->input) {
4949 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4950 	    "PI declaration doesn't start and stop in the same entity\n");
4951 		}
4952 		SKIP(2);
4953 
4954 		/*
4955 		 * SAX: PI detected.
4956 		 */
4957 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
4958 		    (ctxt->sax->processingInstruction != NULL))
4959 		    ctxt->sax->processingInstruction(ctxt->userData,
4960 		                                     target, NULL);
4961 		if (ctxt->instate != XML_PARSER_EOF)
4962 		    ctxt->instate = state;
4963 		return;
4964 	    }
4965 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4966 	    if (buf == NULL) {
4967 		xmlErrMemory(ctxt, NULL);
4968 		ctxt->instate = state;
4969 		return;
4970 	    }
4971 	    cur = CUR;
4972 	    if (!IS_BLANK(cur)) {
4973 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4974 			  "ParsePI: PI %s space expected\n", target);
4975 	    }
4976             SKIP_BLANKS;
4977 	    cur = CUR_CHAR(l);
4978 	    while (IS_CHAR(cur) && /* checked */
4979 		   ((cur != '?') || (NXT(1) != '>'))) {
4980 		if (len + 5 >= size) {
4981 		    xmlChar *tmp;
4982 
4983 		    size *= 2;
4984 		    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4985 		    if (tmp == NULL) {
4986 			xmlErrMemory(ctxt, NULL);
4987 			xmlFree(buf);
4988 			ctxt->instate = state;
4989 			return;
4990 		    }
4991 		    buf = tmp;
4992 		}
4993 		count++;
4994 		if (count > 50) {
4995 		    GROW;
4996 		    count = 0;
4997 		}
4998 		COPY_BUF(l,buf,len,cur);
4999 		NEXTL(l);
5000 		cur = CUR_CHAR(l);
5001 		if (cur == 0) {
5002 		    SHRINK;
5003 		    GROW;
5004 		    cur = CUR_CHAR(l);
5005 		}
5006 	    }
5007 	    buf[len] = 0;
5008 	    if (cur != '?') {
5009 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5010 		      "ParsePI: PI %s never end ...\n", target);
5011 	    } else {
5012 		if (input != ctxt->input) {
5013 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5014 	    "PI declaration doesn't start and stop in the same entity\n");
5015 		}
5016 		SKIP(2);
5017 
5018 #ifdef LIBXML_CATALOG_ENABLED
5019 		if (((state == XML_PARSER_MISC) ||
5020 	             (state == XML_PARSER_START)) &&
5021 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5022 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5023 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5024 			(allow == XML_CATA_ALLOW_ALL))
5025 			xmlParseCatalogPI(ctxt, buf);
5026 		}
5027 #endif
5028 
5029 
5030 		/*
5031 		 * SAX: PI detected.
5032 		 */
5033 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5034 		    (ctxt->sax->processingInstruction != NULL))
5035 		    ctxt->sax->processingInstruction(ctxt->userData,
5036 		                                     target, buf);
5037 	    }
5038 	    xmlFree(buf);
5039 	} else {
5040 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5041 	}
5042 	if (ctxt->instate != XML_PARSER_EOF)
5043 	    ctxt->instate = state;
5044     }
5045 }
5046 
5047 /**
5048  * xmlParseNotationDecl:
5049  * @ctxt:  an XML parser context
5050  *
5051  * parse a notation declaration
5052  *
5053  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5054  *
5055  * Hence there is actually 3 choices:
5056  *     'PUBLIC' S PubidLiteral
5057  *     'PUBLIC' S PubidLiteral S SystemLiteral
5058  * and 'SYSTEM' S SystemLiteral
5059  *
5060  * See the NOTE on xmlParseExternalID().
5061  */
5062 
5063 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5064 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5065     const xmlChar *name;
5066     xmlChar *Pubid;
5067     xmlChar *Systemid;
5068 
5069     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5070 	xmlParserInputPtr input = ctxt->input;
5071 	SHRINK;
5072 	SKIP(10);
5073 	if (!IS_BLANK_CH(CUR)) {
5074 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5075 			   "Space required after '<!NOTATION'\n");
5076 	    return;
5077 	}
5078 	SKIP_BLANKS;
5079 
5080         name = xmlParseName(ctxt);
5081 	if (name == NULL) {
5082 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5083 	    return;
5084 	}
5085 	if (!IS_BLANK_CH(CUR)) {
5086 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5087 		     "Space required after the NOTATION name'\n");
5088 	    return;
5089 	}
5090 	if (xmlStrchr(name, ':') != NULL) {
5091 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5092 		     "colon are forbidden from notation names '%s'\n",
5093 		     name, NULL, NULL);
5094 	}
5095 	SKIP_BLANKS;
5096 
5097 	/*
5098 	 * Parse the IDs.
5099 	 */
5100 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5101 	SKIP_BLANKS;
5102 
5103 	if (RAW == '>') {
5104 	    if (input != ctxt->input) {
5105 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5106 	"Notation declaration doesn't start and stop in the same entity\n");
5107 	    }
5108 	    NEXT;
5109 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5110 		(ctxt->sax->notationDecl != NULL))
5111 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5112 	} else {
5113 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5114 	}
5115 	if (Systemid != NULL) xmlFree(Systemid);
5116 	if (Pubid != NULL) xmlFree(Pubid);
5117     }
5118 }
5119 
5120 /**
5121  * xmlParseEntityDecl:
5122  * @ctxt:  an XML parser context
5123  *
5124  * parse <!ENTITY declarations
5125  *
5126  * [70] EntityDecl ::= GEDecl | PEDecl
5127  *
5128  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5129  *
5130  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5131  *
5132  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5133  *
5134  * [74] PEDef ::= EntityValue | ExternalID
5135  *
5136  * [76] NDataDecl ::= S 'NDATA' S Name
5137  *
5138  * [ VC: Notation Declared ]
5139  * The Name must match the declared name of a notation.
5140  */
5141 
5142 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5143 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5144     const xmlChar *name = NULL;
5145     xmlChar *value = NULL;
5146     xmlChar *URI = NULL, *literal = NULL;
5147     const xmlChar *ndata = NULL;
5148     int isParameter = 0;
5149     xmlChar *orig = NULL;
5150     int skipped;
5151 
5152     /* GROW; done in the caller */
5153     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5154 	xmlParserInputPtr input = ctxt->input;
5155 	SHRINK;
5156 	SKIP(8);
5157 	skipped = SKIP_BLANKS;
5158 	if (skipped == 0) {
5159 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5160 			   "Space required after '<!ENTITY'\n");
5161 	}
5162 
5163 	if (RAW == '%') {
5164 	    NEXT;
5165 	    skipped = SKIP_BLANKS;
5166 	    if (skipped == 0) {
5167 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5168 			       "Space required after '%'\n");
5169 	    }
5170 	    isParameter = 1;
5171 	}
5172 
5173         name = xmlParseName(ctxt);
5174 	if (name == NULL) {
5175 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5176 	                   "xmlParseEntityDecl: no name\n");
5177             return;
5178 	}
5179 	if (xmlStrchr(name, ':') != NULL) {
5180 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5181 		     "colon are forbidden from entities names '%s'\n",
5182 		     name, NULL, NULL);
5183 	}
5184         skipped = SKIP_BLANKS;
5185 	if (skipped == 0) {
5186 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5187 			   "Space required after the entity name\n");
5188 	}
5189 
5190 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5191 	/*
5192 	 * handle the various case of definitions...
5193 	 */
5194 	if (isParameter) {
5195 	    if ((RAW == '"') || (RAW == '\'')) {
5196 	        value = xmlParseEntityValue(ctxt, &orig);
5197 		if (value) {
5198 		    if ((ctxt->sax != NULL) &&
5199 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5200 			ctxt->sax->entityDecl(ctxt->userData, name,
5201 		                    XML_INTERNAL_PARAMETER_ENTITY,
5202 				    NULL, NULL, value);
5203 		}
5204 	    } else {
5205 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5206 		if ((URI == NULL) && (literal == NULL)) {
5207 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5208 		}
5209 		if (URI) {
5210 		    xmlURIPtr uri;
5211 
5212 		    uri = xmlParseURI((const char *) URI);
5213 		    if (uri == NULL) {
5214 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5215 				     "Invalid URI: %s\n", URI);
5216 			/*
5217 			 * This really ought to be a well formedness error
5218 			 * but the XML Core WG decided otherwise c.f. issue
5219 			 * E26 of the XML erratas.
5220 			 */
5221 		    } else {
5222 			if (uri->fragment != NULL) {
5223 			    /*
5224 			     * Okay this is foolish to block those but not
5225 			     * invalid URIs.
5226 			     */
5227 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5228 			} else {
5229 			    if ((ctxt->sax != NULL) &&
5230 				(!ctxt->disableSAX) &&
5231 				(ctxt->sax->entityDecl != NULL))
5232 				ctxt->sax->entityDecl(ctxt->userData, name,
5233 					    XML_EXTERNAL_PARAMETER_ENTITY,
5234 					    literal, URI, NULL);
5235 			}
5236 			xmlFreeURI(uri);
5237 		    }
5238 		}
5239 	    }
5240 	} else {
5241 	    if ((RAW == '"') || (RAW == '\'')) {
5242 	        value = xmlParseEntityValue(ctxt, &orig);
5243 		if ((ctxt->sax != NULL) &&
5244 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5245 		    ctxt->sax->entityDecl(ctxt->userData, name,
5246 				XML_INTERNAL_GENERAL_ENTITY,
5247 				NULL, NULL, value);
5248 		/*
5249 		 * For expat compatibility in SAX mode.
5250 		 */
5251 		if ((ctxt->myDoc == NULL) ||
5252 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5253 		    if (ctxt->myDoc == NULL) {
5254 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5255 			if (ctxt->myDoc == NULL) {
5256 			    xmlErrMemory(ctxt, "New Doc failed");
5257 			    return;
5258 			}
5259 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5260 		    }
5261 		    if (ctxt->myDoc->intSubset == NULL)
5262 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5263 					    BAD_CAST "fake", NULL, NULL);
5264 
5265 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5266 			              NULL, NULL, value);
5267 		}
5268 	    } else {
5269 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5270 		if ((URI == NULL) && (literal == NULL)) {
5271 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5272 		}
5273 		if (URI) {
5274 		    xmlURIPtr uri;
5275 
5276 		    uri = xmlParseURI((const char *)URI);
5277 		    if (uri == NULL) {
5278 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5279 				     "Invalid URI: %s\n", URI);
5280 			/*
5281 			 * This really ought to be a well formedness error
5282 			 * but the XML Core WG decided otherwise c.f. issue
5283 			 * E26 of the XML erratas.
5284 			 */
5285 		    } else {
5286 			if (uri->fragment != NULL) {
5287 			    /*
5288 			     * Okay this is foolish to block those but not
5289 			     * invalid URIs.
5290 			     */
5291 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5292 			}
5293 			xmlFreeURI(uri);
5294 		    }
5295 		}
5296 		if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5297 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5298 				   "Space required before 'NDATA'\n");
5299 		}
5300 		SKIP_BLANKS;
5301 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5302 		    SKIP(5);
5303 		    if (!IS_BLANK_CH(CUR)) {
5304 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5305 				       "Space required after 'NDATA'\n");
5306 		    }
5307 		    SKIP_BLANKS;
5308 		    ndata = xmlParseName(ctxt);
5309 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5310 		        (ctxt->sax->unparsedEntityDecl != NULL))
5311 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5312 				    literal, URI, ndata);
5313 		} else {
5314 		    if ((ctxt->sax != NULL) &&
5315 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5316 			ctxt->sax->entityDecl(ctxt->userData, name,
5317 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5318 				    literal, URI, NULL);
5319 		    /*
5320 		     * For expat compatibility in SAX mode.
5321 		     * assuming the entity repalcement was asked for
5322 		     */
5323 		    if ((ctxt->replaceEntities != 0) &&
5324 			((ctxt->myDoc == NULL) ||
5325 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5326 			if (ctxt->myDoc == NULL) {
5327 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5328 			    if (ctxt->myDoc == NULL) {
5329 			        xmlErrMemory(ctxt, "New Doc failed");
5330 				return;
5331 			    }
5332 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5333 			}
5334 
5335 			if (ctxt->myDoc->intSubset == NULL)
5336 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5337 						BAD_CAST "fake", NULL, NULL);
5338 			xmlSAX2EntityDecl(ctxt, name,
5339 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5340 				          literal, URI, NULL);
5341 		    }
5342 		}
5343 	    }
5344 	}
5345 	SKIP_BLANKS;
5346 	if (RAW != '>') {
5347 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5348 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5349 	} else {
5350 	    if (input != ctxt->input) {
5351 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5352 	"Entity declaration doesn't start and stop in the same entity\n");
5353 	    }
5354 	    NEXT;
5355 	}
5356 	if (orig != NULL) {
5357 	    /*
5358 	     * Ugly mechanism to save the raw entity value.
5359 	     */
5360 	    xmlEntityPtr cur = NULL;
5361 
5362 	    if (isParameter) {
5363 	        if ((ctxt->sax != NULL) &&
5364 		    (ctxt->sax->getParameterEntity != NULL))
5365 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5366 	    } else {
5367 	        if ((ctxt->sax != NULL) &&
5368 		    (ctxt->sax->getEntity != NULL))
5369 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5370 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5371 		    cur = xmlSAX2GetEntity(ctxt, name);
5372 		}
5373 	    }
5374             if (cur != NULL) {
5375 	        if (cur->orig != NULL)
5376 		    xmlFree(orig);
5377 		else
5378 		    cur->orig = orig;
5379 	    } else
5380 		xmlFree(orig);
5381 	}
5382 	if (value != NULL) xmlFree(value);
5383 	if (URI != NULL) xmlFree(URI);
5384 	if (literal != NULL) xmlFree(literal);
5385     }
5386 }
5387 
5388 /**
5389  * xmlParseDefaultDecl:
5390  * @ctxt:  an XML parser context
5391  * @value:  Receive a possible fixed default value for the attribute
5392  *
5393  * Parse an attribute default declaration
5394  *
5395  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5396  *
5397  * [ VC: Required Attribute ]
5398  * if the default declaration is the keyword #REQUIRED, then the
5399  * attribute must be specified for all elements of the type in the
5400  * attribute-list declaration.
5401  *
5402  * [ VC: Attribute Default Legal ]
5403  * The declared default value must meet the lexical constraints of
5404  * the declared attribute type c.f. xmlValidateAttributeDecl()
5405  *
5406  * [ VC: Fixed Attribute Default ]
5407  * if an attribute has a default value declared with the #FIXED
5408  * keyword, instances of that attribute must match the default value.
5409  *
5410  * [ WFC: No < in Attribute Values ]
5411  * handled in xmlParseAttValue()
5412  *
5413  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5414  *          or XML_ATTRIBUTE_FIXED.
5415  */
5416 
5417 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5418 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5419     int val;
5420     xmlChar *ret;
5421 
5422     *value = NULL;
5423     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5424 	SKIP(9);
5425 	return(XML_ATTRIBUTE_REQUIRED);
5426     }
5427     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5428 	SKIP(8);
5429 	return(XML_ATTRIBUTE_IMPLIED);
5430     }
5431     val = XML_ATTRIBUTE_NONE;
5432     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5433 	SKIP(6);
5434 	val = XML_ATTRIBUTE_FIXED;
5435 	if (!IS_BLANK_CH(CUR)) {
5436 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 			   "Space required after '#FIXED'\n");
5438 	}
5439 	SKIP_BLANKS;
5440     }
5441     ret = xmlParseAttValue(ctxt);
5442     ctxt->instate = XML_PARSER_DTD;
5443     if (ret == NULL) {
5444 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5445 		       "Attribute default value declaration error\n");
5446     } else
5447         *value = ret;
5448     return(val);
5449 }
5450 
5451 /**
5452  * xmlParseNotationType:
5453  * @ctxt:  an XML parser context
5454  *
5455  * parse an Notation attribute type.
5456  *
5457  * Note: the leading 'NOTATION' S part has already being parsed...
5458  *
5459  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5460  *
5461  * [ VC: Notation Attributes ]
5462  * Values of this type must match one of the notation names included
5463  * in the declaration; all notation names in the declaration must be declared.
5464  *
5465  * Returns: the notation attribute tree built while parsing
5466  */
5467 
5468 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5469 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5470     const xmlChar *name;
5471     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5472 
5473     if (RAW != '(') {
5474 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5475 	return(NULL);
5476     }
5477     SHRINK;
5478     do {
5479         NEXT;
5480 	SKIP_BLANKS;
5481         name = xmlParseName(ctxt);
5482 	if (name == NULL) {
5483 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5484 			   "Name expected in NOTATION declaration\n");
5485             xmlFreeEnumeration(ret);
5486 	    return(NULL);
5487 	}
5488 	tmp = ret;
5489 	while (tmp != NULL) {
5490 	    if (xmlStrEqual(name, tmp->name)) {
5491 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5492 	  "standalone: attribute notation value token %s duplicated\n",
5493 				 name, NULL);
5494 		if (!xmlDictOwns(ctxt->dict, name))
5495 		    xmlFree((xmlChar *) name);
5496 		break;
5497 	    }
5498 	    tmp = tmp->next;
5499 	}
5500 	if (tmp == NULL) {
5501 	    cur = xmlCreateEnumeration(name);
5502 	    if (cur == NULL) {
5503                 xmlFreeEnumeration(ret);
5504                 return(NULL);
5505             }
5506 	    if (last == NULL) ret = last = cur;
5507 	    else {
5508 		last->next = cur;
5509 		last = cur;
5510 	    }
5511 	}
5512 	SKIP_BLANKS;
5513     } while (RAW == '|');
5514     if (RAW != ')') {
5515 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5516         xmlFreeEnumeration(ret);
5517 	return(NULL);
5518     }
5519     NEXT;
5520     return(ret);
5521 }
5522 
5523 /**
5524  * xmlParseEnumerationType:
5525  * @ctxt:  an XML parser context
5526  *
5527  * parse an Enumeration attribute type.
5528  *
5529  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5530  *
5531  * [ VC: Enumeration ]
5532  * Values of this type must match one of the Nmtoken tokens in
5533  * the declaration
5534  *
5535  * Returns: the enumeration attribute tree built while parsing
5536  */
5537 
5538 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5539 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5540     xmlChar *name;
5541     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5542 
5543     if (RAW != '(') {
5544 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5545 	return(NULL);
5546     }
5547     SHRINK;
5548     do {
5549         NEXT;
5550 	SKIP_BLANKS;
5551         name = xmlParseNmtoken(ctxt);
5552 	if (name == NULL) {
5553 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5554 	    return(ret);
5555 	}
5556 	tmp = ret;
5557 	while (tmp != NULL) {
5558 	    if (xmlStrEqual(name, tmp->name)) {
5559 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5560 	  "standalone: attribute enumeration value token %s duplicated\n",
5561 				 name, NULL);
5562 		if (!xmlDictOwns(ctxt->dict, name))
5563 		    xmlFree(name);
5564 		break;
5565 	    }
5566 	    tmp = tmp->next;
5567 	}
5568 	if (tmp == NULL) {
5569 	    cur = xmlCreateEnumeration(name);
5570 	    if (!xmlDictOwns(ctxt->dict, name))
5571 		xmlFree(name);
5572 	    if (cur == NULL) {
5573                 xmlFreeEnumeration(ret);
5574                 return(NULL);
5575             }
5576 	    if (last == NULL) ret = last = cur;
5577 	    else {
5578 		last->next = cur;
5579 		last = cur;
5580 	    }
5581 	}
5582 	SKIP_BLANKS;
5583     } while (RAW == '|');
5584     if (RAW != ')') {
5585 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5586 	return(ret);
5587     }
5588     NEXT;
5589     return(ret);
5590 }
5591 
5592 /**
5593  * xmlParseEnumeratedType:
5594  * @ctxt:  an XML parser context
5595  * @tree:  the enumeration tree built while parsing
5596  *
5597  * parse an Enumerated attribute type.
5598  *
5599  * [57] EnumeratedType ::= NotationType | Enumeration
5600  *
5601  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5602  *
5603  *
5604  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5605  */
5606 
5607 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5608 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5609     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5610 	SKIP(8);
5611 	if (!IS_BLANK_CH(CUR)) {
5612 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5613 			   "Space required after 'NOTATION'\n");
5614 	    return(0);
5615 	}
5616         SKIP_BLANKS;
5617 	*tree = xmlParseNotationType(ctxt);
5618 	if (*tree == NULL) return(0);
5619 	return(XML_ATTRIBUTE_NOTATION);
5620     }
5621     *tree = xmlParseEnumerationType(ctxt);
5622     if (*tree == NULL) return(0);
5623     return(XML_ATTRIBUTE_ENUMERATION);
5624 }
5625 
5626 /**
5627  * xmlParseAttributeType:
5628  * @ctxt:  an XML parser context
5629  * @tree:  the enumeration tree built while parsing
5630  *
5631  * parse the Attribute list def for an element
5632  *
5633  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5634  *
5635  * [55] StringType ::= 'CDATA'
5636  *
5637  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5638  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5639  *
5640  * Validity constraints for attribute values syntax are checked in
5641  * xmlValidateAttributeValue()
5642  *
5643  * [ VC: ID ]
5644  * Values of type ID must match the Name production. A name must not
5645  * appear more than once in an XML document as a value of this type;
5646  * i.e., ID values must uniquely identify the elements which bear them.
5647  *
5648  * [ VC: One ID per Element Type ]
5649  * No element type may have more than one ID attribute specified.
5650  *
5651  * [ VC: ID Attribute Default ]
5652  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5653  *
5654  * [ VC: IDREF ]
5655  * Values of type IDREF must match the Name production, and values
5656  * of type IDREFS must match Names; each IDREF Name must match the value
5657  * of an ID attribute on some element in the XML document; i.e. IDREF
5658  * values must match the value of some ID attribute.
5659  *
5660  * [ VC: Entity Name ]
5661  * Values of type ENTITY must match the Name production, values
5662  * of type ENTITIES must match Names; each Entity Name must match the
5663  * name of an unparsed entity declared in the DTD.
5664  *
5665  * [ VC: Name Token ]
5666  * Values of type NMTOKEN must match the Nmtoken production; values
5667  * of type NMTOKENS must match Nmtokens.
5668  *
5669  * Returns the attribute type
5670  */
5671 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5672 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5673     SHRINK;
5674     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5675 	SKIP(5);
5676 	return(XML_ATTRIBUTE_CDATA);
5677      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5678 	SKIP(6);
5679 	return(XML_ATTRIBUTE_IDREFS);
5680      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5681 	SKIP(5);
5682 	return(XML_ATTRIBUTE_IDREF);
5683      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5684         SKIP(2);
5685 	return(XML_ATTRIBUTE_ID);
5686      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5687 	SKIP(6);
5688 	return(XML_ATTRIBUTE_ENTITY);
5689      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5690 	SKIP(8);
5691 	return(XML_ATTRIBUTE_ENTITIES);
5692      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5693 	SKIP(8);
5694 	return(XML_ATTRIBUTE_NMTOKENS);
5695      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5696 	SKIP(7);
5697 	return(XML_ATTRIBUTE_NMTOKEN);
5698      }
5699      return(xmlParseEnumeratedType(ctxt, tree));
5700 }
5701 
5702 /**
5703  * xmlParseAttributeListDecl:
5704  * @ctxt:  an XML parser context
5705  *
5706  * : parse the Attribute list def for an element
5707  *
5708  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5709  *
5710  * [53] AttDef ::= S Name S AttType S DefaultDecl
5711  *
5712  */
5713 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5714 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5715     const xmlChar *elemName;
5716     const xmlChar *attrName;
5717     xmlEnumerationPtr tree;
5718 
5719     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5720 	xmlParserInputPtr input = ctxt->input;
5721 
5722 	SKIP(9);
5723 	if (!IS_BLANK_CH(CUR)) {
5724 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5725 		                 "Space required after '<!ATTLIST'\n");
5726 	}
5727         SKIP_BLANKS;
5728         elemName = xmlParseName(ctxt);
5729 	if (elemName == NULL) {
5730 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5731 			   "ATTLIST: no name for Element\n");
5732 	    return;
5733 	}
5734 	SKIP_BLANKS;
5735 	GROW;
5736 	while (RAW != '>') {
5737 	    const xmlChar *check = CUR_PTR;
5738 	    int type;
5739 	    int def;
5740 	    xmlChar *defaultValue = NULL;
5741 
5742 	    GROW;
5743             tree = NULL;
5744 	    attrName = xmlParseName(ctxt);
5745 	    if (attrName == NULL) {
5746 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 			       "ATTLIST: no name for Attribute\n");
5748 		break;
5749 	    }
5750 	    GROW;
5751 	    if (!IS_BLANK_CH(CUR)) {
5752 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5753 		        "Space required after the attribute name\n");
5754 		break;
5755 	    }
5756 	    SKIP_BLANKS;
5757 
5758 	    type = xmlParseAttributeType(ctxt, &tree);
5759 	    if (type <= 0) {
5760 	        break;
5761 	    }
5762 
5763 	    GROW;
5764 	    if (!IS_BLANK_CH(CUR)) {
5765 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5766 			       "Space required after the attribute type\n");
5767 	        if (tree != NULL)
5768 		    xmlFreeEnumeration(tree);
5769 		break;
5770 	    }
5771 	    SKIP_BLANKS;
5772 
5773 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
5774 	    if (def <= 0) {
5775                 if (defaultValue != NULL)
5776 		    xmlFree(defaultValue);
5777 	        if (tree != NULL)
5778 		    xmlFreeEnumeration(tree);
5779 	        break;
5780 	    }
5781 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5782 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
5783 
5784 	    GROW;
5785             if (RAW != '>') {
5786 		if (!IS_BLANK_CH(CUR)) {
5787 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5788 			"Space required after the attribute default value\n");
5789 		    if (defaultValue != NULL)
5790 			xmlFree(defaultValue);
5791 		    if (tree != NULL)
5792 			xmlFreeEnumeration(tree);
5793 		    break;
5794 		}
5795 		SKIP_BLANKS;
5796 	    }
5797 	    if (check == CUR_PTR) {
5798 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5799 		            "in xmlParseAttributeListDecl\n");
5800 		if (defaultValue != NULL)
5801 		    xmlFree(defaultValue);
5802 	        if (tree != NULL)
5803 		    xmlFreeEnumeration(tree);
5804 		break;
5805 	    }
5806 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5807 		(ctxt->sax->attributeDecl != NULL))
5808 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5809 	                        type, def, defaultValue, tree);
5810 	    else if (tree != NULL)
5811 		xmlFreeEnumeration(tree);
5812 
5813 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
5814 	        (def != XML_ATTRIBUTE_IMPLIED) &&
5815 		(def != XML_ATTRIBUTE_REQUIRED)) {
5816 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5817 	    }
5818 	    if (ctxt->sax2) {
5819 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5820 	    }
5821 	    if (defaultValue != NULL)
5822 	        xmlFree(defaultValue);
5823 	    GROW;
5824 	}
5825 	if (RAW == '>') {
5826 	    if (input != ctxt->input) {
5827 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5828     "Attribute list declaration doesn't start and stop in the same entity\n",
5829                                  NULL, NULL);
5830 	    }
5831 	    NEXT;
5832 	}
5833     }
5834 }
5835 
5836 /**
5837  * xmlParseElementMixedContentDecl:
5838  * @ctxt:  an XML parser context
5839  * @inputchk:  the input used for the current entity, needed for boundary checks
5840  *
5841  * parse the declaration for a Mixed Element content
5842  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5843  *
5844  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5845  *                '(' S? '#PCDATA' S? ')'
5846  *
5847  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5848  *
5849  * [ VC: No Duplicate Types ]
5850  * The same name must not appear more than once in a single
5851  * mixed-content declaration.
5852  *
5853  * returns: the list of the xmlElementContentPtr describing the element choices
5854  */
5855 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5856 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5857     xmlElementContentPtr ret = NULL, cur = NULL, n;
5858     const xmlChar *elem = NULL;
5859 
5860     GROW;
5861     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5862 	SKIP(7);
5863 	SKIP_BLANKS;
5864 	SHRINK;
5865 	if (RAW == ')') {
5866 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5867 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5868 "Element content declaration doesn't start and stop in the same entity\n",
5869                                  NULL, NULL);
5870 	    }
5871 	    NEXT;
5872 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5873 	    if (ret == NULL)
5874 	        return(NULL);
5875 	    if (RAW == '*') {
5876 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
5877 		NEXT;
5878 	    }
5879 	    return(ret);
5880 	}
5881 	if ((RAW == '(') || (RAW == '|')) {
5882 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5883 	    if (ret == NULL) return(NULL);
5884 	}
5885 	while (RAW == '|') {
5886 	    NEXT;
5887 	    if (elem == NULL) {
5888 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5889 		if (ret == NULL) return(NULL);
5890 		ret->c1 = cur;
5891 		if (cur != NULL)
5892 		    cur->parent = ret;
5893 		cur = ret;
5894 	    } else {
5895 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5896 		if (n == NULL) return(NULL);
5897 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5898 		if (n->c1 != NULL)
5899 		    n->c1->parent = n;
5900 	        cur->c2 = n;
5901 		if (n != NULL)
5902 		    n->parent = cur;
5903 		cur = n;
5904 	    }
5905 	    SKIP_BLANKS;
5906 	    elem = xmlParseName(ctxt);
5907 	    if (elem == NULL) {
5908 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5909 			"xmlParseElementMixedContentDecl : Name expected\n");
5910 		xmlFreeDocElementContent(ctxt->myDoc, cur);
5911 		return(NULL);
5912 	    }
5913 	    SKIP_BLANKS;
5914 	    GROW;
5915 	}
5916 	if ((RAW == ')') && (NXT(1) == '*')) {
5917 	    if (elem != NULL) {
5918 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5919 		                               XML_ELEMENT_CONTENT_ELEMENT);
5920 		if (cur->c2 != NULL)
5921 		    cur->c2->parent = cur;
5922             }
5923             if (ret != NULL)
5924                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5925 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5926 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5927 "Element content declaration doesn't start and stop in the same entity\n",
5928 				 NULL, NULL);
5929 	    }
5930 	    SKIP(2);
5931 	} else {
5932 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
5933 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5934 	    return(NULL);
5935 	}
5936 
5937     } else {
5938 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5939     }
5940     return(ret);
5941 }
5942 
5943 /**
5944  * xmlParseElementChildrenContentDeclPriv:
5945  * @ctxt:  an XML parser context
5946  * @inputchk:  the input used for the current entity, needed for boundary checks
5947  * @depth: the level of recursion
5948  *
5949  * parse the declaration for a Mixed Element content
5950  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5951  *
5952  *
5953  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5954  *
5955  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5956  *
5957  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5958  *
5959  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5960  *
5961  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5962  * TODO Parameter-entity replacement text must be properly nested
5963  *	with parenthesized groups. That is to say, if either of the
5964  *	opening or closing parentheses in a choice, seq, or Mixed
5965  *	construct is contained in the replacement text for a parameter
5966  *	entity, both must be contained in the same replacement text. For
5967  *	interoperability, if a parameter-entity reference appears in a
5968  *	choice, seq, or Mixed construct, its replacement text should not
5969  *	be empty, and neither the first nor last non-blank character of
5970  *	the replacement text should be a connector (| or ,).
5971  *
5972  * Returns the tree of xmlElementContentPtr describing the element
5973  *          hierarchy.
5974  */
5975 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)5976 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5977                                        int depth) {
5978     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5979     const xmlChar *elem;
5980     xmlChar type = 0;
5981 
5982     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5983         (depth >  2048)) {
5984         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5985 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5986                           depth);
5987 	return(NULL);
5988     }
5989     SKIP_BLANKS;
5990     GROW;
5991     if (RAW == '(') {
5992 	int inputid = ctxt->input->id;
5993 
5994         /* Recurse on first child */
5995 	NEXT;
5996 	SKIP_BLANKS;
5997         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5998                                                            depth + 1);
5999 	SKIP_BLANKS;
6000 	GROW;
6001     } else {
6002 	elem = xmlParseName(ctxt);
6003 	if (elem == NULL) {
6004 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6005 	    return(NULL);
6006 	}
6007         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6008 	if (cur == NULL) {
6009 	    xmlErrMemory(ctxt, NULL);
6010 	    return(NULL);
6011 	}
6012 	GROW;
6013 	if (RAW == '?') {
6014 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6015 	    NEXT;
6016 	} else if (RAW == '*') {
6017 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6018 	    NEXT;
6019 	} else if (RAW == '+') {
6020 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6021 	    NEXT;
6022 	} else {
6023 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6024 	}
6025 	GROW;
6026     }
6027     SKIP_BLANKS;
6028     SHRINK;
6029     while (RAW != ')') {
6030         /*
6031 	 * Each loop we parse one separator and one element.
6032 	 */
6033         if (RAW == ',') {
6034 	    if (type == 0) type = CUR;
6035 
6036 	    /*
6037 	     * Detect "Name | Name , Name" error
6038 	     */
6039 	    else if (type != CUR) {
6040 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6041 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6042 		                  type);
6043 		if ((last != NULL) && (last != ret))
6044 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6045 		if (ret != NULL)
6046 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6047 		return(NULL);
6048 	    }
6049 	    NEXT;
6050 
6051 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6052 	    if (op == NULL) {
6053 		if ((last != NULL) && (last != ret))
6054 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6055 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6056 		return(NULL);
6057 	    }
6058 	    if (last == NULL) {
6059 		op->c1 = ret;
6060 		if (ret != NULL)
6061 		    ret->parent = op;
6062 		ret = cur = op;
6063 	    } else {
6064 	        cur->c2 = op;
6065 		if (op != NULL)
6066 		    op->parent = cur;
6067 		op->c1 = last;
6068 		if (last != NULL)
6069 		    last->parent = op;
6070 		cur =op;
6071 		last = NULL;
6072 	    }
6073 	} else if (RAW == '|') {
6074 	    if (type == 0) type = CUR;
6075 
6076 	    /*
6077 	     * Detect "Name , Name | Name" error
6078 	     */
6079 	    else if (type != CUR) {
6080 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6081 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6082 				  type);
6083 		if ((last != NULL) && (last != ret))
6084 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6085 		if (ret != NULL)
6086 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6087 		return(NULL);
6088 	    }
6089 	    NEXT;
6090 
6091 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6092 	    if (op == NULL) {
6093 		if ((last != NULL) && (last != ret))
6094 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6095 		if (ret != NULL)
6096 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6097 		return(NULL);
6098 	    }
6099 	    if (last == NULL) {
6100 		op->c1 = ret;
6101 		if (ret != NULL)
6102 		    ret->parent = op;
6103 		ret = cur = op;
6104 	    } else {
6105 	        cur->c2 = op;
6106 		if (op != NULL)
6107 		    op->parent = cur;
6108 		op->c1 = last;
6109 		if (last != NULL)
6110 		    last->parent = op;
6111 		cur =op;
6112 		last = NULL;
6113 	    }
6114 	} else {
6115 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6116 	    if ((last != NULL) && (last != ret))
6117 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6118 	    if (ret != NULL)
6119 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6120 	    return(NULL);
6121 	}
6122 	GROW;
6123 	SKIP_BLANKS;
6124 	GROW;
6125 	if (RAW == '(') {
6126 	    int inputid = ctxt->input->id;
6127 	    /* Recurse on second child */
6128 	    NEXT;
6129 	    SKIP_BLANKS;
6130 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6131                                                           depth + 1);
6132 	    SKIP_BLANKS;
6133 	} else {
6134 	    elem = xmlParseName(ctxt);
6135 	    if (elem == NULL) {
6136 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6137 		if (ret != NULL)
6138 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6139 		return(NULL);
6140 	    }
6141 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6142 	    if (last == NULL) {
6143 		if (ret != NULL)
6144 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6145 		return(NULL);
6146 	    }
6147 	    if (RAW == '?') {
6148 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6149 		NEXT;
6150 	    } else if (RAW == '*') {
6151 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6152 		NEXT;
6153 	    } else if (RAW == '+') {
6154 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6155 		NEXT;
6156 	    } else {
6157 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6158 	    }
6159 	}
6160 	SKIP_BLANKS;
6161 	GROW;
6162     }
6163     if ((cur != NULL) && (last != NULL)) {
6164         cur->c2 = last;
6165 	if (last != NULL)
6166 	    last->parent = cur;
6167     }
6168     if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6169 	xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6170 "Element content declaration doesn't start and stop in the same entity\n",
6171 			 NULL, NULL);
6172     }
6173     NEXT;
6174     if (RAW == '?') {
6175 	if (ret != NULL) {
6176 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6177 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6178 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6179 	    else
6180 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6181 	}
6182 	NEXT;
6183     } else if (RAW == '*') {
6184 	if (ret != NULL) {
6185 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6186 	    cur = ret;
6187 	    /*
6188 	     * Some normalization:
6189 	     * (a | b* | c?)* == (a | b | c)*
6190 	     */
6191 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6192 		if ((cur->c1 != NULL) &&
6193 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6194 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6195 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6196 		if ((cur->c2 != NULL) &&
6197 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6199 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6200 		cur = cur->c2;
6201 	    }
6202 	}
6203 	NEXT;
6204     } else if (RAW == '+') {
6205 	if (ret != NULL) {
6206 	    int found = 0;
6207 
6208 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6209 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6210 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6211 	    else
6212 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6213 	    /*
6214 	     * Some normalization:
6215 	     * (a | b*)+ == (a | b)*
6216 	     * (a | b?)+ == (a | b)*
6217 	     */
6218 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6219 		if ((cur->c1 != NULL) &&
6220 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6221 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6222 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6223 		    found = 1;
6224 		}
6225 		if ((cur->c2 != NULL) &&
6226 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6227 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6228 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6229 		    found = 1;
6230 		}
6231 		cur = cur->c2;
6232 	    }
6233 	    if (found)
6234 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6235 	}
6236 	NEXT;
6237     }
6238     return(ret);
6239 }
6240 
6241 /**
6242  * xmlParseElementChildrenContentDecl:
6243  * @ctxt:  an XML parser context
6244  * @inputchk:  the input used for the current entity, needed for boundary checks
6245  *
6246  * parse the declaration for a Mixed Element content
6247  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248  *
6249  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6250  *
6251  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6252  *
6253  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6254  *
6255  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6256  *
6257  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6258  * TODO Parameter-entity replacement text must be properly nested
6259  *	with parenthesized groups. That is to say, if either of the
6260  *	opening or closing parentheses in a choice, seq, or Mixed
6261  *	construct is contained in the replacement text for a parameter
6262  *	entity, both must be contained in the same replacement text. For
6263  *	interoperability, if a parameter-entity reference appears in a
6264  *	choice, seq, or Mixed construct, its replacement text should not
6265  *	be empty, and neither the first nor last non-blank character of
6266  *	the replacement text should be a connector (| or ,).
6267  *
6268  * Returns the tree of xmlElementContentPtr describing the element
6269  *          hierarchy.
6270  */
6271 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6272 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6273     /* stub left for API/ABI compat */
6274     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6275 }
6276 
6277 /**
6278  * xmlParseElementContentDecl:
6279  * @ctxt:  an XML parser context
6280  * @name:  the name of the element being defined.
6281  * @result:  the Element Content pointer will be stored here if any
6282  *
6283  * parse the declaration for an Element content either Mixed or Children,
6284  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6285  *
6286  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6287  *
6288  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6289  */
6290 
6291 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6292 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6293                            xmlElementContentPtr *result) {
6294 
6295     xmlElementContentPtr tree = NULL;
6296     int inputid = ctxt->input->id;
6297     int res;
6298 
6299     *result = NULL;
6300 
6301     if (RAW != '(') {
6302 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6303 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6304 	return(-1);
6305     }
6306     NEXT;
6307     GROW;
6308     SKIP_BLANKS;
6309     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6310         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6311 	res = XML_ELEMENT_TYPE_MIXED;
6312     } else {
6313         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6314 	res = XML_ELEMENT_TYPE_ELEMENT;
6315     }
6316     SKIP_BLANKS;
6317     *result = tree;
6318     return(res);
6319 }
6320 
6321 /**
6322  * xmlParseElementDecl:
6323  * @ctxt:  an XML parser context
6324  *
6325  * parse an Element declaration.
6326  *
6327  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6328  *
6329  * [ VC: Unique Element Type Declaration ]
6330  * No element type may be declared more than once
6331  *
6332  * Returns the type of the element, or -1 in case of error
6333  */
6334 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6335 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6336     const xmlChar *name;
6337     int ret = -1;
6338     xmlElementContentPtr content  = NULL;
6339 
6340     /* GROW; done in the caller */
6341     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6342 	xmlParserInputPtr input = ctxt->input;
6343 
6344 	SKIP(9);
6345 	if (!IS_BLANK_CH(CUR)) {
6346 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6347 		           "Space required after 'ELEMENT'\n");
6348 	}
6349         SKIP_BLANKS;
6350         name = xmlParseName(ctxt);
6351 	if (name == NULL) {
6352 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6353 			   "xmlParseElementDecl: no name for Element\n");
6354 	    return(-1);
6355 	}
6356 	while ((RAW == 0) && (ctxt->inputNr > 1))
6357 	    xmlPopInput(ctxt);
6358 	if (!IS_BLANK_CH(CUR)) {
6359 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6360 			   "Space required after the element name\n");
6361 	}
6362         SKIP_BLANKS;
6363 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6364 	    SKIP(5);
6365 	    /*
6366 	     * Element must always be empty.
6367 	     */
6368 	    ret = XML_ELEMENT_TYPE_EMPTY;
6369 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6370 	           (NXT(2) == 'Y')) {
6371 	    SKIP(3);
6372 	    /*
6373 	     * Element is a generic container.
6374 	     */
6375 	    ret = XML_ELEMENT_TYPE_ANY;
6376 	} else if (RAW == '(') {
6377 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6378 	} else {
6379 	    /*
6380 	     * [ WFC: PEs in Internal Subset ] error handling.
6381 	     */
6382 	    if ((RAW == '%') && (ctxt->external == 0) &&
6383 	        (ctxt->inputNr == 1)) {
6384 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6385 	  "PEReference: forbidden within markup decl in internal subset\n");
6386 	    } else {
6387 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6388 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6389             }
6390 	    return(-1);
6391 	}
6392 
6393 	SKIP_BLANKS;
6394 	/*
6395 	 * Pop-up of finished entities.
6396 	 */
6397 	while ((RAW == 0) && (ctxt->inputNr > 1))
6398 	    xmlPopInput(ctxt);
6399 	SKIP_BLANKS;
6400 
6401 	if (RAW != '>') {
6402 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6403 	    if (content != NULL) {
6404 		xmlFreeDocElementContent(ctxt->myDoc, content);
6405 	    }
6406 	} else {
6407 	    if (input != ctxt->input) {
6408 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6409     "Element declaration doesn't start and stop in the same entity\n");
6410 	    }
6411 
6412 	    NEXT;
6413 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6414 		(ctxt->sax->elementDecl != NULL)) {
6415 		if (content != NULL)
6416 		    content->parent = NULL;
6417 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6418 		                       content);
6419 		if ((content != NULL) && (content->parent == NULL)) {
6420 		    /*
6421 		     * this is a trick: if xmlAddElementDecl is called,
6422 		     * instead of copying the full tree it is plugged directly
6423 		     * if called from the parser. Avoid duplicating the
6424 		     * interfaces or change the API/ABI
6425 		     */
6426 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6427 		}
6428 	    } else if (content != NULL) {
6429 		xmlFreeDocElementContent(ctxt->myDoc, content);
6430 	    }
6431 	}
6432     }
6433     return(ret);
6434 }
6435 
6436 /**
6437  * xmlParseConditionalSections
6438  * @ctxt:  an XML parser context
6439  *
6440  * [61] conditionalSect ::= includeSect | ignoreSect
6441  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6442  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6443  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6444  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6445  */
6446 
6447 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6448 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6449     int id = ctxt->input->id;
6450 
6451     SKIP(3);
6452     SKIP_BLANKS;
6453     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6454 	SKIP(7);
6455 	SKIP_BLANKS;
6456 	if (RAW != '[') {
6457 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6458 	} else {
6459 	    if (ctxt->input->id != id) {
6460 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6461 	    "All markup of the conditional section is not in the same entity\n",
6462 				     NULL, NULL);
6463 	    }
6464 	    NEXT;
6465 	}
6466 	if (xmlParserDebugEntities) {
6467 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6468 		xmlGenericError(xmlGenericErrorContext,
6469 			"%s(%d): ", ctxt->input->filename,
6470 			ctxt->input->line);
6471 	    xmlGenericError(xmlGenericErrorContext,
6472 		    "Entering INCLUDE Conditional Section\n");
6473 	}
6474 
6475 	while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6476 	       (NXT(2) != '>'))) {
6477 	    const xmlChar *check = CUR_PTR;
6478 	    unsigned int cons = ctxt->input->consumed;
6479 
6480 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6481 		xmlParseConditionalSections(ctxt);
6482 	    } else if (IS_BLANK_CH(CUR)) {
6483 		NEXT;
6484 	    } else if (RAW == '%') {
6485 		xmlParsePEReference(ctxt);
6486 	    } else
6487 		xmlParseMarkupDecl(ctxt);
6488 
6489 	    /*
6490 	     * Pop-up of finished entities.
6491 	     */
6492 	    while ((RAW == 0) && (ctxt->inputNr > 1))
6493 		xmlPopInput(ctxt);
6494 
6495 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6496 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6497 		break;
6498 	    }
6499 	}
6500 	if (xmlParserDebugEntities) {
6501 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6502 		xmlGenericError(xmlGenericErrorContext,
6503 			"%s(%d): ", ctxt->input->filename,
6504 			ctxt->input->line);
6505 	    xmlGenericError(xmlGenericErrorContext,
6506 		    "Leaving INCLUDE Conditional Section\n");
6507 	}
6508 
6509     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6510 	int state;
6511 	xmlParserInputState instate;
6512 	int depth = 0;
6513 
6514 	SKIP(6);
6515 	SKIP_BLANKS;
6516 	if (RAW != '[') {
6517 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6518 	} else {
6519 	    if (ctxt->input->id != id) {
6520 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6521 	    "All markup of the conditional section is not in the same entity\n",
6522 				     NULL, NULL);
6523 	    }
6524 	    NEXT;
6525 	}
6526 	if (xmlParserDebugEntities) {
6527 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6528 		xmlGenericError(xmlGenericErrorContext,
6529 			"%s(%d): ", ctxt->input->filename,
6530 			ctxt->input->line);
6531 	    xmlGenericError(xmlGenericErrorContext,
6532 		    "Entering IGNORE Conditional Section\n");
6533 	}
6534 
6535 	/*
6536 	 * Parse up to the end of the conditional section
6537 	 * But disable SAX event generating DTD building in the meantime
6538 	 */
6539 	state = ctxt->disableSAX;
6540 	instate = ctxt->instate;
6541 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6542 	ctxt->instate = XML_PARSER_IGNORE;
6543 
6544 	while ((depth >= 0) && (RAW != 0)) {
6545 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6546 	    depth++;
6547 	    SKIP(3);
6548 	    continue;
6549 	  }
6550 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6551 	    if (--depth >= 0) SKIP(3);
6552 	    continue;
6553 	  }
6554 	  NEXT;
6555 	  continue;
6556 	}
6557 
6558 	ctxt->disableSAX = state;
6559 	ctxt->instate = instate;
6560 
6561 	if (xmlParserDebugEntities) {
6562 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6563 		xmlGenericError(xmlGenericErrorContext,
6564 			"%s(%d): ", ctxt->input->filename,
6565 			ctxt->input->line);
6566 	    xmlGenericError(xmlGenericErrorContext,
6567 		    "Leaving IGNORE Conditional Section\n");
6568 	}
6569 
6570     } else {
6571 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6572     }
6573 
6574     if (RAW == 0)
6575         SHRINK;
6576 
6577     if (RAW == 0) {
6578 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6579     } else {
6580 	if (ctxt->input->id != id) {
6581 	    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6582 	"All markup of the conditional section is not in the same entity\n",
6583 				 NULL, NULL);
6584 	}
6585         SKIP(3);
6586     }
6587 }
6588 
6589 /**
6590  * xmlParseMarkupDecl:
6591  * @ctxt:  an XML parser context
6592  *
6593  * parse Markup declarations
6594  *
6595  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6596  *                     NotationDecl | PI | Comment
6597  *
6598  * [ VC: Proper Declaration/PE Nesting ]
6599  * Parameter-entity replacement text must be properly nested with
6600  * markup declarations. That is to say, if either the first character
6601  * or the last character of a markup declaration (markupdecl above) is
6602  * contained in the replacement text for a parameter-entity reference,
6603  * both must be contained in the same replacement text.
6604  *
6605  * [ WFC: PEs in Internal Subset ]
6606  * In the internal DTD subset, parameter-entity references can occur
6607  * only where markup declarations can occur, not within markup declarations.
6608  * (This does not apply to references that occur in external parameter
6609  * entities or to the external subset.)
6610  */
6611 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6612 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6613     GROW;
6614     if (CUR == '<') {
6615         if (NXT(1) == '!') {
6616 	    switch (NXT(2)) {
6617 	        case 'E':
6618 		    if (NXT(3) == 'L')
6619 			xmlParseElementDecl(ctxt);
6620 		    else if (NXT(3) == 'N')
6621 			xmlParseEntityDecl(ctxt);
6622 		    break;
6623 	        case 'A':
6624 		    xmlParseAttributeListDecl(ctxt);
6625 		    break;
6626 	        case 'N':
6627 		    xmlParseNotationDecl(ctxt);
6628 		    break;
6629 	        case '-':
6630 		    xmlParseComment(ctxt);
6631 		    break;
6632 		default:
6633 		    /* there is an error but it will be detected later */
6634 		    break;
6635 	    }
6636 	} else if (NXT(1) == '?') {
6637 	    xmlParsePI(ctxt);
6638 	}
6639     }
6640     /*
6641      * This is only for internal subset. On external entities,
6642      * the replacement is done before parsing stage
6643      */
6644     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6645 	xmlParsePEReference(ctxt);
6646 
6647     /*
6648      * Conditional sections are allowed from entities included
6649      * by PE References in the internal subset.
6650      */
6651     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6652         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6653 	    xmlParseConditionalSections(ctxt);
6654 	}
6655     }
6656 
6657     ctxt->instate = XML_PARSER_DTD;
6658 }
6659 
6660 /**
6661  * xmlParseTextDecl:
6662  * @ctxt:  an XML parser context
6663  *
6664  * parse an XML declaration header for external entities
6665  *
6666  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6667  */
6668 
6669 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6670 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6671     xmlChar *version;
6672     const xmlChar *encoding;
6673 
6674     /*
6675      * We know that '<?xml' is here.
6676      */
6677     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6678 	SKIP(5);
6679     } else {
6680 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6681 	return;
6682     }
6683 
6684     if (!IS_BLANK_CH(CUR)) {
6685 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6686 		       "Space needed after '<?xml'\n");
6687     }
6688     SKIP_BLANKS;
6689 
6690     /*
6691      * We may have the VersionInfo here.
6692      */
6693     version = xmlParseVersionInfo(ctxt);
6694     if (version == NULL)
6695 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6696     else {
6697 	if (!IS_BLANK_CH(CUR)) {
6698 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6699 		           "Space needed here\n");
6700 	}
6701     }
6702     ctxt->input->version = version;
6703 
6704     /*
6705      * We must have the encoding declaration
6706      */
6707     encoding = xmlParseEncodingDecl(ctxt);
6708     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6709 	/*
6710 	 * The XML REC instructs us to stop parsing right here
6711 	 */
6712         return;
6713     }
6714     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6715 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6716 		       "Missing encoding in text declaration\n");
6717     }
6718 
6719     SKIP_BLANKS;
6720     if ((RAW == '?') && (NXT(1) == '>')) {
6721         SKIP(2);
6722     } else if (RAW == '>') {
6723         /* Deprecated old WD ... */
6724 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6725 	NEXT;
6726     } else {
6727 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6728 	MOVETO_ENDTAG(CUR_PTR);
6729 	NEXT;
6730     }
6731 }
6732 
6733 /**
6734  * xmlParseExternalSubset:
6735  * @ctxt:  an XML parser context
6736  * @ExternalID: the external identifier
6737  * @SystemID: the system identifier (or URL)
6738  *
6739  * parse Markup declarations from an external subset
6740  *
6741  * [30] extSubset ::= textDecl? extSubsetDecl
6742  *
6743  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6744  */
6745 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6746 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6747                        const xmlChar *SystemID) {
6748     xmlDetectSAX2(ctxt);
6749     GROW;
6750 
6751     if ((ctxt->encoding == NULL) &&
6752         (ctxt->input->end - ctxt->input->cur >= 4)) {
6753         xmlChar start[4];
6754 	xmlCharEncoding enc;
6755 
6756 	start[0] = RAW;
6757 	start[1] = NXT(1);
6758 	start[2] = NXT(2);
6759 	start[3] = NXT(3);
6760 	enc = xmlDetectCharEncoding(start, 4);
6761 	if (enc != XML_CHAR_ENCODING_NONE)
6762 	    xmlSwitchEncoding(ctxt, enc);
6763     }
6764 
6765     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6766 	xmlParseTextDecl(ctxt);
6767 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6768 	    /*
6769 	     * The XML REC instructs us to stop parsing right here
6770 	     */
6771 	    ctxt->instate = XML_PARSER_EOF;
6772 	    return;
6773 	}
6774     }
6775     if (ctxt->myDoc == NULL) {
6776         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6777 	if (ctxt->myDoc == NULL) {
6778 	    xmlErrMemory(ctxt, "New Doc failed");
6779 	    return;
6780 	}
6781 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
6782     }
6783     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6784         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6785 
6786     ctxt->instate = XML_PARSER_DTD;
6787     ctxt->external = 1;
6788     while (((RAW == '<') && (NXT(1) == '?')) ||
6789            ((RAW == '<') && (NXT(1) == '!')) ||
6790 	   (RAW == '%') || IS_BLANK_CH(CUR)) {
6791 	const xmlChar *check = CUR_PTR;
6792 	unsigned int cons = ctxt->input->consumed;
6793 
6794 	GROW;
6795         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6796 	    xmlParseConditionalSections(ctxt);
6797 	} else if (IS_BLANK_CH(CUR)) {
6798 	    NEXT;
6799 	} else if (RAW == '%') {
6800             xmlParsePEReference(ctxt);
6801 	} else
6802 	    xmlParseMarkupDecl(ctxt);
6803 
6804 	/*
6805 	 * Pop-up of finished entities.
6806 	 */
6807 	while ((RAW == 0) && (ctxt->inputNr > 1))
6808 	    xmlPopInput(ctxt);
6809 
6810 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6811 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6812 	    break;
6813 	}
6814     }
6815 
6816     if (RAW != 0) {
6817 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6818     }
6819 
6820 }
6821 
6822 /**
6823  * xmlParseReference:
6824  * @ctxt:  an XML parser context
6825  *
6826  * parse and handle entity references in content, depending on the SAX
6827  * interface, this may end-up in a call to character() if this is a
6828  * CharRef, a predefined entity, if there is no reference() callback.
6829  * or if the parser was asked to switch to that mode.
6830  *
6831  * [67] Reference ::= EntityRef | CharRef
6832  */
6833 void
xmlParseReference(xmlParserCtxtPtr ctxt)6834 xmlParseReference(xmlParserCtxtPtr ctxt) {
6835     xmlEntityPtr ent;
6836     xmlChar *val;
6837     int was_checked;
6838     xmlNodePtr list = NULL;
6839     xmlParserErrors ret = XML_ERR_OK;
6840 
6841 
6842     if (RAW != '&')
6843         return;
6844 
6845     /*
6846      * Simple case of a CharRef
6847      */
6848     if (NXT(1) == '#') {
6849 	int i = 0;
6850 	xmlChar out[10];
6851 	int hex = NXT(2);
6852 	int value = xmlParseCharRef(ctxt);
6853 
6854 	if (value == 0)
6855 	    return;
6856 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6857 	    /*
6858 	     * So we are using non-UTF-8 buffers
6859 	     * Check that the char fit on 8bits, if not
6860 	     * generate a CharRef.
6861 	     */
6862 	    if (value <= 0xFF) {
6863 		out[0] = value;
6864 		out[1] = 0;
6865 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6866 		    (!ctxt->disableSAX))
6867 		    ctxt->sax->characters(ctxt->userData, out, 1);
6868 	    } else {
6869 		if ((hex == 'x') || (hex == 'X'))
6870 		    snprintf((char *)out, sizeof(out), "#x%X", value);
6871 		else
6872 		    snprintf((char *)out, sizeof(out), "#%d", value);
6873 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6874 		    (!ctxt->disableSAX))
6875 		    ctxt->sax->reference(ctxt->userData, out);
6876 	    }
6877 	} else {
6878 	    /*
6879 	     * Just encode the value in UTF-8
6880 	     */
6881 	    COPY_BUF(0 ,out, i, value);
6882 	    out[i] = 0;
6883 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6884 		(!ctxt->disableSAX))
6885 		ctxt->sax->characters(ctxt->userData, out, i);
6886 	}
6887 	return;
6888     }
6889 
6890     /*
6891      * We are seeing an entity reference
6892      */
6893     ent = xmlParseEntityRef(ctxt);
6894     if (ent == NULL) return;
6895     if (!ctxt->wellFormed)
6896 	return;
6897     was_checked = ent->checked;
6898 
6899     /* special case of predefined entities */
6900     if ((ent->name == NULL) ||
6901         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6902 	val = ent->content;
6903 	if (val == NULL) return;
6904 	/*
6905 	 * inline the entity.
6906 	 */
6907 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6908 	    (!ctxt->disableSAX))
6909 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6910 	return;
6911     }
6912 
6913     /*
6914      * The first reference to the entity trigger a parsing phase
6915      * where the ent->children is filled with the result from
6916      * the parsing.
6917      */
6918     if (ent->checked == 0) {
6919 	unsigned long oldnbent = ctxt->nbentities;
6920 
6921 	/*
6922 	 * This is a bit hackish but this seems the best
6923 	 * way to make sure both SAX and DOM entity support
6924 	 * behaves okay.
6925 	 */
6926 	void *user_data;
6927 	if (ctxt->userData == ctxt)
6928 	    user_data = NULL;
6929 	else
6930 	    user_data = ctxt->userData;
6931 
6932 	/*
6933 	 * Check that this entity is well formed
6934 	 * 4.3.2: An internal general parsed entity is well-formed
6935 	 * if its replacement text matches the production labeled
6936 	 * content.
6937 	 */
6938 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6939 	    ctxt->depth++;
6940 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6941 	                                              user_data, &list);
6942 	    ctxt->depth--;
6943 
6944 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6945 	    ctxt->depth++;
6946 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6947 	                                   user_data, ctxt->depth, ent->URI,
6948 					   ent->ExternalID, &list);
6949 	    ctxt->depth--;
6950 	} else {
6951 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
6952 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6953 			 "invalid entity type found\n", NULL);
6954 	}
6955 
6956 	/*
6957 	 * Store the number of entities needing parsing for this entity
6958 	 * content and do checkings
6959 	 */
6960 	ent->checked = ctxt->nbentities - oldnbent;
6961 	if (ret == XML_ERR_ENTITY_LOOP) {
6962 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6963 	    xmlFreeNodeList(list);
6964 	    return;
6965 	}
6966 	if (xmlParserEntityCheck(ctxt, 0, ent)) {
6967 	    xmlFreeNodeList(list);
6968 	    return;
6969 	}
6970 
6971 	if ((ret == XML_ERR_OK) && (list != NULL)) {
6972 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6973 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6974 		(ent->children == NULL)) {
6975 		ent->children = list;
6976 		if (ctxt->replaceEntities) {
6977 		    /*
6978 		     * Prune it directly in the generated document
6979 		     * except for single text nodes.
6980 		     */
6981 		    if (((list->type == XML_TEXT_NODE) &&
6982 			 (list->next == NULL)) ||
6983 			(ctxt->parseMode == XML_PARSE_READER)) {
6984 			list->parent = (xmlNodePtr) ent;
6985 			list = NULL;
6986 			ent->owner = 1;
6987 		    } else {
6988 			ent->owner = 0;
6989 			while (list != NULL) {
6990 			    list->parent = (xmlNodePtr) ctxt->node;
6991 			    list->doc = ctxt->myDoc;
6992 			    if (list->next == NULL)
6993 				ent->last = list;
6994 			    list = list->next;
6995 			}
6996 			list = ent->children;
6997 #ifdef LIBXML_LEGACY_ENABLED
6998 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6999 			  xmlAddEntityReference(ent, list, NULL);
7000 #endif /* LIBXML_LEGACY_ENABLED */
7001 		    }
7002 		} else {
7003 		    ent->owner = 1;
7004 		    while (list != NULL) {
7005 			list->parent = (xmlNodePtr) ent;
7006 			xmlSetTreeDoc(list, ent->doc);
7007 			if (list->next == NULL)
7008 			    ent->last = list;
7009 			list = list->next;
7010 		    }
7011 		}
7012 	    } else {
7013 		xmlFreeNodeList(list);
7014 		list = NULL;
7015 	    }
7016 	} else if ((ret != XML_ERR_OK) &&
7017 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
7018 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7019 		     "Entity '%s' failed to parse\n", ent->name);
7020 	} else if (list != NULL) {
7021 	    xmlFreeNodeList(list);
7022 	    list = NULL;
7023 	}
7024 	if (ent->checked == 0)
7025 	    ent->checked = 1;
7026     } else if (ent->checked != 1) {
7027 	ctxt->nbentities += ent->checked;
7028     }
7029 
7030     /*
7031      * Now that the entity content has been gathered
7032      * provide it to the application, this can take different forms based
7033      * on the parsing modes.
7034      */
7035     if (ent->children == NULL) {
7036 	/*
7037 	 * Probably running in SAX mode and the callbacks don't
7038 	 * build the entity content. So unless we already went
7039 	 * though parsing for first checking go though the entity
7040 	 * content to generate callbacks associated to the entity
7041 	 */
7042 	if (was_checked != 0) {
7043 	    void *user_data;
7044 	    /*
7045 	     * This is a bit hackish but this seems the best
7046 	     * way to make sure both SAX and DOM entity support
7047 	     * behaves okay.
7048 	     */
7049 	    if (ctxt->userData == ctxt)
7050 		user_data = NULL;
7051 	    else
7052 		user_data = ctxt->userData;
7053 
7054 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7055 		ctxt->depth++;
7056 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7057 				   ent->content, user_data, NULL);
7058 		ctxt->depth--;
7059 	    } else if (ent->etype ==
7060 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7061 		ctxt->depth++;
7062 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7063 			   ctxt->sax, user_data, ctxt->depth,
7064 			   ent->URI, ent->ExternalID, NULL);
7065 		ctxt->depth--;
7066 	    } else {
7067 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7068 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7069 			     "invalid entity type found\n", NULL);
7070 	    }
7071 	    if (ret == XML_ERR_ENTITY_LOOP) {
7072 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7073 		return;
7074 	    }
7075 	}
7076 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7077 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7078 	    /*
7079 	     * Entity reference callback comes second, it's somewhat
7080 	     * superfluous but a compatibility to historical behaviour
7081 	     */
7082 	    ctxt->sax->reference(ctxt->userData, ent->name);
7083 	}
7084 	return;
7085     }
7086 
7087     /*
7088      * If we didn't get any children for the entity being built
7089      */
7090     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7091 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7092 	/*
7093 	 * Create a node.
7094 	 */
7095 	ctxt->sax->reference(ctxt->userData, ent->name);
7096 	return;
7097     }
7098 
7099     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7100 	/*
7101 	 * There is a problem on the handling of _private for entities
7102 	 * (bug 155816): Should we copy the content of the field from
7103 	 * the entity (possibly overwriting some value set by the user
7104 	 * when a copy is created), should we leave it alone, or should
7105 	 * we try to take care of different situations?  The problem
7106 	 * is exacerbated by the usage of this field by the xmlReader.
7107 	 * To fix this bug, we look at _private on the created node
7108 	 * and, if it's NULL, we copy in whatever was in the entity.
7109 	 * If it's not NULL we leave it alone.  This is somewhat of a
7110 	 * hack - maybe we should have further tests to determine
7111 	 * what to do.
7112 	 */
7113 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7114 	    /*
7115 	     * Seems we are generating the DOM content, do
7116 	     * a simple tree copy for all references except the first
7117 	     * In the first occurrence list contains the replacement.
7118 	     * progressive == 2 means we are operating on the Reader
7119 	     * and since nodes are discarded we must copy all the time.
7120 	     */
7121 	    if (((list == NULL) && (ent->owner == 0)) ||
7122 		(ctxt->parseMode == XML_PARSE_READER)) {
7123 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7124 
7125 		/*
7126 		 * when operating on a reader, the entities definitions
7127 		 * are always owning the entities subtree.
7128 		if (ctxt->parseMode == XML_PARSE_READER)
7129 		    ent->owner = 1;
7130 		 */
7131 
7132 		cur = ent->children;
7133 		while (cur != NULL) {
7134 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7135 		    if (nw != NULL) {
7136 			if (nw->_private == NULL)
7137 			    nw->_private = cur->_private;
7138 			if (firstChild == NULL){
7139 			    firstChild = nw;
7140 			}
7141 			nw = xmlAddChild(ctxt->node, nw);
7142 		    }
7143 		    if (cur == ent->last) {
7144 			/*
7145 			 * needed to detect some strange empty
7146 			 * node cases in the reader tests
7147 			 */
7148 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7149 			    (nw != NULL) &&
7150 			    (nw->type == XML_ELEMENT_NODE) &&
7151 			    (nw->children == NULL))
7152 			    nw->extra = 1;
7153 
7154 			break;
7155 		    }
7156 		    cur = cur->next;
7157 		}
7158 #ifdef LIBXML_LEGACY_ENABLED
7159 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7160 		  xmlAddEntityReference(ent, firstChild, nw);
7161 #endif /* LIBXML_LEGACY_ENABLED */
7162 	    } else if (list == NULL) {
7163 		xmlNodePtr nw = NULL, cur, next, last,
7164 			   firstChild = NULL;
7165 		/*
7166 		 * Copy the entity child list and make it the new
7167 		 * entity child list. The goal is to make sure any
7168 		 * ID or REF referenced will be the one from the
7169 		 * document content and not the entity copy.
7170 		 */
7171 		cur = ent->children;
7172 		ent->children = NULL;
7173 		last = ent->last;
7174 		ent->last = NULL;
7175 		while (cur != NULL) {
7176 		    next = cur->next;
7177 		    cur->next = NULL;
7178 		    cur->parent = NULL;
7179 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7180 		    if (nw != NULL) {
7181 			if (nw->_private == NULL)
7182 			    nw->_private = cur->_private;
7183 			if (firstChild == NULL){
7184 			    firstChild = cur;
7185 			}
7186 			xmlAddChild((xmlNodePtr) ent, nw);
7187 			xmlAddChild(ctxt->node, cur);
7188 		    }
7189 		    if (cur == last)
7190 			break;
7191 		    cur = next;
7192 		}
7193 		if (ent->owner == 0)
7194 		    ent->owner = 1;
7195 #ifdef LIBXML_LEGACY_ENABLED
7196 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7197 		  xmlAddEntityReference(ent, firstChild, nw);
7198 #endif /* LIBXML_LEGACY_ENABLED */
7199 	    } else {
7200 		const xmlChar *nbktext;
7201 
7202 		/*
7203 		 * the name change is to avoid coalescing of the
7204 		 * node with a possible previous text one which
7205 		 * would make ent->children a dangling pointer
7206 		 */
7207 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7208 					-1);
7209 		if (ent->children->type == XML_TEXT_NODE)
7210 		    ent->children->name = nbktext;
7211 		if ((ent->last != ent->children) &&
7212 		    (ent->last->type == XML_TEXT_NODE))
7213 		    ent->last->name = nbktext;
7214 		xmlAddChildList(ctxt->node, ent->children);
7215 	    }
7216 
7217 	    /*
7218 	     * This is to avoid a nasty side effect, see
7219 	     * characters() in SAX.c
7220 	     */
7221 	    ctxt->nodemem = 0;
7222 	    ctxt->nodelen = 0;
7223 	    return;
7224 	}
7225     }
7226 }
7227 
7228 /**
7229  * xmlParseEntityRef:
7230  * @ctxt:  an XML parser context
7231  *
7232  * parse ENTITY references declarations
7233  *
7234  * [68] EntityRef ::= '&' Name ';'
7235  *
7236  * [ WFC: Entity Declared ]
7237  * In a document without any DTD, a document with only an internal DTD
7238  * subset which contains no parameter entity references, or a document
7239  * with "standalone='yes'", the Name given in the entity reference
7240  * must match that in an entity declaration, except that well-formed
7241  * documents need not declare any of the following entities: amp, lt,
7242  * gt, apos, quot.  The declaration of a parameter entity must precede
7243  * any reference to it.  Similarly, the declaration of a general entity
7244  * must precede any reference to it which appears in a default value in an
7245  * attribute-list declaration. Note that if entities are declared in the
7246  * external subset or in external parameter entities, a non-validating
7247  * processor is not obligated to read and process their declarations;
7248  * for such documents, the rule that an entity must be declared is a
7249  * well-formedness constraint only if standalone='yes'.
7250  *
7251  * [ WFC: Parsed Entity ]
7252  * An entity reference must not contain the name of an unparsed entity
7253  *
7254  * Returns the xmlEntityPtr if found, or NULL otherwise.
7255  */
7256 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7257 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7258     const xmlChar *name;
7259     xmlEntityPtr ent = NULL;
7260 
7261     GROW;
7262 
7263     if (RAW != '&')
7264         return(NULL);
7265     NEXT;
7266     name = xmlParseName(ctxt);
7267     if (name == NULL) {
7268 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7269 		       "xmlParseEntityRef: no name\n");
7270         return(NULL);
7271     }
7272     if (RAW != ';') {
7273 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7274 	return(NULL);
7275     }
7276     NEXT;
7277 
7278     /*
7279      * Predefined entites override any extra definition
7280      */
7281     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7282         ent = xmlGetPredefinedEntity(name);
7283         if (ent != NULL)
7284             return(ent);
7285     }
7286 
7287     /*
7288      * Increate the number of entity references parsed
7289      */
7290     ctxt->nbentities++;
7291 
7292     /*
7293      * Ask first SAX for entity resolution, otherwise try the
7294      * entities which may have stored in the parser context.
7295      */
7296     if (ctxt->sax != NULL) {
7297 	if (ctxt->sax->getEntity != NULL)
7298 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7299 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7300 	    (ctxt->options & XML_PARSE_OLDSAX))
7301 	    ent = xmlGetPredefinedEntity(name);
7302 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7303 	    (ctxt->userData==ctxt)) {
7304 	    ent = xmlSAX2GetEntity(ctxt, name);
7305 	}
7306     }
7307     /*
7308      * [ WFC: Entity Declared ]
7309      * In a document without any DTD, a document with only an
7310      * internal DTD subset which contains no parameter entity
7311      * references, or a document with "standalone='yes'", the
7312      * Name given in the entity reference must match that in an
7313      * entity declaration, except that well-formed documents
7314      * need not declare any of the following entities: amp, lt,
7315      * gt, apos, quot.
7316      * The declaration of a parameter entity must precede any
7317      * reference to it.
7318      * Similarly, the declaration of a general entity must
7319      * precede any reference to it which appears in a default
7320      * value in an attribute-list declaration. Note that if
7321      * entities are declared in the external subset or in
7322      * external parameter entities, a non-validating processor
7323      * is not obligated to read and process their declarations;
7324      * for such documents, the rule that an entity must be
7325      * declared is a well-formedness constraint only if
7326      * standalone='yes'.
7327      */
7328     if (ent == NULL) {
7329 	if ((ctxt->standalone == 1) ||
7330 	    ((ctxt->hasExternalSubset == 0) &&
7331 	     (ctxt->hasPErefs == 0))) {
7332 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7333 		     "Entity '%s' not defined\n", name);
7334 	} else {
7335 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7336 		     "Entity '%s' not defined\n", name);
7337 	    if ((ctxt->inSubset == 0) &&
7338 		(ctxt->sax != NULL) &&
7339 		(ctxt->sax->reference != NULL)) {
7340 		ctxt->sax->reference(ctxt->userData, name);
7341 	    }
7342 	}
7343 	ctxt->valid = 0;
7344     }
7345 
7346     /*
7347      * [ WFC: Parsed Entity ]
7348      * An entity reference must not contain the name of an
7349      * unparsed entity
7350      */
7351     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7352 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7353 		 "Entity reference to unparsed entity %s\n", name);
7354     }
7355 
7356     /*
7357      * [ WFC: No External Entity References ]
7358      * Attribute values cannot contain direct or indirect
7359      * entity references to external entities.
7360      */
7361     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7362 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7363 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7364 	     "Attribute references external entity '%s'\n", name);
7365     }
7366     /*
7367      * [ WFC: No < in Attribute Values ]
7368      * The replacement text of any entity referred to directly or
7369      * indirectly in an attribute value (other than "&lt;") must
7370      * not contain a <.
7371      */
7372     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7373 	     (ent != NULL) && (ent->content != NULL) &&
7374 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7375 	     (xmlStrchr(ent->content, '<'))) {
7376 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7377     "'<' in entity '%s' is not allowed in attributes values\n", name);
7378     }
7379 
7380     /*
7381      * Internal check, no parameter entities here ...
7382      */
7383     else {
7384 	switch (ent->etype) {
7385 	    case XML_INTERNAL_PARAMETER_ENTITY:
7386 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7387 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7388 	     "Attempt to reference the parameter entity '%s'\n",
7389 			      name);
7390 	    break;
7391 	    default:
7392 	    break;
7393 	}
7394     }
7395 
7396     /*
7397      * [ WFC: No Recursion ]
7398      * A parsed entity must not contain a recursive reference
7399      * to itself, either directly or indirectly.
7400      * Done somewhere else
7401      */
7402     return(ent);
7403 }
7404 
7405 /**
7406  * xmlParseStringEntityRef:
7407  * @ctxt:  an XML parser context
7408  * @str:  a pointer to an index in the string
7409  *
7410  * parse ENTITY references declarations, but this version parses it from
7411  * a string value.
7412  *
7413  * [68] EntityRef ::= '&' Name ';'
7414  *
7415  * [ WFC: Entity Declared ]
7416  * In a document without any DTD, a document with only an internal DTD
7417  * subset which contains no parameter entity references, or a document
7418  * with "standalone='yes'", the Name given in the entity reference
7419  * must match that in an entity declaration, except that well-formed
7420  * documents need not declare any of the following entities: amp, lt,
7421  * gt, apos, quot.  The declaration of a parameter entity must precede
7422  * any reference to it.  Similarly, the declaration of a general entity
7423  * must precede any reference to it which appears in a default value in an
7424  * attribute-list declaration. Note that if entities are declared in the
7425  * external subset or in external parameter entities, a non-validating
7426  * processor is not obligated to read and process their declarations;
7427  * for such documents, the rule that an entity must be declared is a
7428  * well-formedness constraint only if standalone='yes'.
7429  *
7430  * [ WFC: Parsed Entity ]
7431  * An entity reference must not contain the name of an unparsed entity
7432  *
7433  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7434  * is updated to the current location in the string.
7435  */
7436 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7437 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7438     xmlChar *name;
7439     const xmlChar *ptr;
7440     xmlChar cur;
7441     xmlEntityPtr ent = NULL;
7442 
7443     if ((str == NULL) || (*str == NULL))
7444         return(NULL);
7445     ptr = *str;
7446     cur = *ptr;
7447     if (cur != '&')
7448 	return(NULL);
7449 
7450     ptr++;
7451     name = xmlParseStringName(ctxt, &ptr);
7452     if (name == NULL) {
7453 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7454 		       "xmlParseStringEntityRef: no name\n");
7455 	*str = ptr;
7456 	return(NULL);
7457     }
7458     if (*ptr != ';') {
7459 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7460         xmlFree(name);
7461 	*str = ptr;
7462 	return(NULL);
7463     }
7464     ptr++;
7465 
7466 
7467     /*
7468      * Predefined entites override any extra definition
7469      */
7470     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7471         ent = xmlGetPredefinedEntity(name);
7472         if (ent != NULL) {
7473             xmlFree(name);
7474             *str = ptr;
7475             return(ent);
7476         }
7477     }
7478 
7479     /*
7480      * Increate the number of entity references parsed
7481      */
7482     ctxt->nbentities++;
7483 
7484     /*
7485      * Ask first SAX for entity resolution, otherwise try the
7486      * entities which may have stored in the parser context.
7487      */
7488     if (ctxt->sax != NULL) {
7489 	if (ctxt->sax->getEntity != NULL)
7490 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7491 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7492 	    ent = xmlGetPredefinedEntity(name);
7493 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7494 	    ent = xmlSAX2GetEntity(ctxt, name);
7495 	}
7496     }
7497 
7498     /*
7499      * [ WFC: Entity Declared ]
7500      * In a document without any DTD, a document with only an
7501      * internal DTD subset which contains no parameter entity
7502      * references, or a document with "standalone='yes'", the
7503      * Name given in the entity reference must match that in an
7504      * entity declaration, except that well-formed documents
7505      * need not declare any of the following entities: amp, lt,
7506      * gt, apos, quot.
7507      * The declaration of a parameter entity must precede any
7508      * reference to it.
7509      * Similarly, the declaration of a general entity must
7510      * precede any reference to it which appears in a default
7511      * value in an attribute-list declaration. Note that if
7512      * entities are declared in the external subset or in
7513      * external parameter entities, a non-validating processor
7514      * is not obligated to read and process their declarations;
7515      * for such documents, the rule that an entity must be
7516      * declared is a well-formedness constraint only if
7517      * standalone='yes'.
7518      */
7519     if (ent == NULL) {
7520 	if ((ctxt->standalone == 1) ||
7521 	    ((ctxt->hasExternalSubset == 0) &&
7522 	     (ctxt->hasPErefs == 0))) {
7523 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7524 		     "Entity '%s' not defined\n", name);
7525 	} else {
7526 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7527 			  "Entity '%s' not defined\n",
7528 			  name);
7529 	}
7530 	/* TODO ? check regressions ctxt->valid = 0; */
7531     }
7532 
7533     /*
7534      * [ WFC: Parsed Entity ]
7535      * An entity reference must not contain the name of an
7536      * unparsed entity
7537      */
7538     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7539 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7540 		 "Entity reference to unparsed entity %s\n", name);
7541     }
7542 
7543     /*
7544      * [ WFC: No External Entity References ]
7545      * Attribute values cannot contain direct or indirect
7546      * entity references to external entities.
7547      */
7548     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7549 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7550 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7551 	 "Attribute references external entity '%s'\n", name);
7552     }
7553     /*
7554      * [ WFC: No < in Attribute Values ]
7555      * The replacement text of any entity referred to directly or
7556      * indirectly in an attribute value (other than "&lt;") must
7557      * not contain a <.
7558      */
7559     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7560 	     (ent != NULL) && (ent->content != NULL) &&
7561 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7562 	     (xmlStrchr(ent->content, '<'))) {
7563 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7564      "'<' in entity '%s' is not allowed in attributes values\n",
7565 			  name);
7566     }
7567 
7568     /*
7569      * Internal check, no parameter entities here ...
7570      */
7571     else {
7572 	switch (ent->etype) {
7573 	    case XML_INTERNAL_PARAMETER_ENTITY:
7574 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7575 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7576 	     "Attempt to reference the parameter entity '%s'\n",
7577 				  name);
7578 	    break;
7579 	    default:
7580 	    break;
7581 	}
7582     }
7583 
7584     /*
7585      * [ WFC: No Recursion ]
7586      * A parsed entity must not contain a recursive reference
7587      * to itself, either directly or indirectly.
7588      * Done somewhere else
7589      */
7590 
7591     xmlFree(name);
7592     *str = ptr;
7593     return(ent);
7594 }
7595 
7596 /**
7597  * xmlParsePEReference:
7598  * @ctxt:  an XML parser context
7599  *
7600  * parse PEReference declarations
7601  * The entity content is handled directly by pushing it's content as
7602  * a new input stream.
7603  *
7604  * [69] PEReference ::= '%' Name ';'
7605  *
7606  * [ WFC: No Recursion ]
7607  * A parsed entity must not contain a recursive
7608  * reference to itself, either directly or indirectly.
7609  *
7610  * [ WFC: Entity Declared ]
7611  * In a document without any DTD, a document with only an internal DTD
7612  * subset which contains no parameter entity references, or a document
7613  * with "standalone='yes'", ...  ... The declaration of a parameter
7614  * entity must precede any reference to it...
7615  *
7616  * [ VC: Entity Declared ]
7617  * In a document with an external subset or external parameter entities
7618  * with "standalone='no'", ...  ... The declaration of a parameter entity
7619  * must precede any reference to it...
7620  *
7621  * [ WFC: In DTD ]
7622  * Parameter-entity references may only appear in the DTD.
7623  * NOTE: misleading but this is handled.
7624  */
7625 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7626 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7627 {
7628     const xmlChar *name;
7629     xmlEntityPtr entity = NULL;
7630     xmlParserInputPtr input;
7631 
7632     if (RAW != '%')
7633         return;
7634     NEXT;
7635     name = xmlParseName(ctxt);
7636     if (name == NULL) {
7637 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7638 		       "xmlParsePEReference: no name\n");
7639 	return;
7640     }
7641     if (RAW != ';') {
7642 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7643         return;
7644     }
7645 
7646     NEXT;
7647 
7648     /*
7649      * Increate the number of entity references parsed
7650      */
7651     ctxt->nbentities++;
7652 
7653     /*
7654      * Request the entity from SAX
7655      */
7656     if ((ctxt->sax != NULL) &&
7657 	(ctxt->sax->getParameterEntity != NULL))
7658 	entity = ctxt->sax->getParameterEntity(ctxt->userData,
7659 					       name);
7660     if (entity == NULL) {
7661 	/*
7662 	 * [ WFC: Entity Declared ]
7663 	 * In a document without any DTD, a document with only an
7664 	 * internal DTD subset which contains no parameter entity
7665 	 * references, or a document with "standalone='yes'", ...
7666 	 * ... The declaration of a parameter entity must precede
7667 	 * any reference to it...
7668 	 */
7669 	if ((ctxt->standalone == 1) ||
7670 	    ((ctxt->hasExternalSubset == 0) &&
7671 	     (ctxt->hasPErefs == 0))) {
7672 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673 			      "PEReference: %%%s; not found\n",
7674 			      name);
7675 	} else {
7676 	    /*
7677 	     * [ VC: Entity Declared ]
7678 	     * In a document with an external subset or external
7679 	     * parameter entities with "standalone='no'", ...
7680 	     * ... The declaration of a parameter entity must
7681 	     * precede any reference to it...
7682 	     */
7683 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7684 			  "PEReference: %%%s; not found\n",
7685 			  name, NULL);
7686 	    ctxt->valid = 0;
7687 	}
7688     } else {
7689 	/*
7690 	 * Internal checking in case the entity quest barfed
7691 	 */
7692 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7693 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7694 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7695 		  "Internal: %%%s; is not a parameter entity\n",
7696 			  name, NULL);
7697 	} else if (ctxt->input->free != deallocblankswrapper) {
7698 	    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7699 	    if (xmlPushInput(ctxt, input) < 0)
7700 		return;
7701 	} else {
7702 	    /*
7703 	     * TODO !!!
7704 	     * handle the extra spaces added before and after
7705 	     * c.f. http://www.w3.org/TR/REC-xml#as-PE
7706 	     */
7707 	    input = xmlNewEntityInputStream(ctxt, entity);
7708 	    if (xmlPushInput(ctxt, input) < 0)
7709 		return;
7710 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7711 		(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7712 		(IS_BLANK_CH(NXT(5)))) {
7713 		xmlParseTextDecl(ctxt);
7714 		if (ctxt->errNo ==
7715 		    XML_ERR_UNSUPPORTED_ENCODING) {
7716 		    /*
7717 		     * The XML REC instructs us to stop parsing
7718 		     * right here
7719 		     */
7720 		    ctxt->instate = XML_PARSER_EOF;
7721 		    return;
7722 		}
7723 	    }
7724 	}
7725     }
7726     ctxt->hasPErefs = 1;
7727 }
7728 
7729 /**
7730  * xmlLoadEntityContent:
7731  * @ctxt:  an XML parser context
7732  * @entity: an unloaded system entity
7733  *
7734  * Load the original content of the given system entity from the
7735  * ExternalID/SystemID given. This is to be used for Included in Literal
7736  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7737  *
7738  * Returns 0 in case of success and -1 in case of failure
7739  */
7740 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7741 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7742     xmlParserInputPtr input;
7743     xmlBufferPtr buf;
7744     int l, c;
7745     int count = 0;
7746 
7747     if ((ctxt == NULL) || (entity == NULL) ||
7748         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7749 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7750 	(entity->content != NULL)) {
7751 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7752 	            "xmlLoadEntityContent parameter error");
7753         return(-1);
7754     }
7755 
7756     if (xmlParserDebugEntities)
7757 	xmlGenericError(xmlGenericErrorContext,
7758 		"Reading %s entity content input\n", entity->name);
7759 
7760     buf = xmlBufferCreate();
7761     if (buf == NULL) {
7762 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7763 	            "xmlLoadEntityContent parameter error");
7764         return(-1);
7765     }
7766 
7767     input = xmlNewEntityInputStream(ctxt, entity);
7768     if (input == NULL) {
7769 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7770 	            "xmlLoadEntityContent input error");
7771 	xmlBufferFree(buf);
7772         return(-1);
7773     }
7774 
7775     /*
7776      * Push the entity as the current input, read char by char
7777      * saving to the buffer until the end of the entity or an error
7778      */
7779     if (xmlPushInput(ctxt, input) < 0) {
7780         xmlBufferFree(buf);
7781 	return(-1);
7782     }
7783 
7784     GROW;
7785     c = CUR_CHAR(l);
7786     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7787            (IS_CHAR(c))) {
7788         xmlBufferAdd(buf, ctxt->input->cur, l);
7789 	if (count++ > 100) {
7790 	    count = 0;
7791 	    GROW;
7792 	}
7793 	NEXTL(l);
7794 	c = CUR_CHAR(l);
7795     }
7796 
7797     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7798         xmlPopInput(ctxt);
7799     } else if (!IS_CHAR(c)) {
7800         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7801                           "xmlLoadEntityContent: invalid char value %d\n",
7802 	                  c);
7803 	xmlBufferFree(buf);
7804 	return(-1);
7805     }
7806     entity->content = buf->content;
7807     buf->content = NULL;
7808     xmlBufferFree(buf);
7809 
7810     return(0);
7811 }
7812 
7813 /**
7814  * xmlParseStringPEReference:
7815  * @ctxt:  an XML parser context
7816  * @str:  a pointer to an index in the string
7817  *
7818  * parse PEReference declarations
7819  *
7820  * [69] PEReference ::= '%' Name ';'
7821  *
7822  * [ WFC: No Recursion ]
7823  * A parsed entity must not contain a recursive
7824  * reference to itself, either directly or indirectly.
7825  *
7826  * [ WFC: Entity Declared ]
7827  * In a document without any DTD, a document with only an internal DTD
7828  * subset which contains no parameter entity references, or a document
7829  * with "standalone='yes'", ...  ... The declaration of a parameter
7830  * entity must precede any reference to it...
7831  *
7832  * [ VC: Entity Declared ]
7833  * In a document with an external subset or external parameter entities
7834  * with "standalone='no'", ...  ... The declaration of a parameter entity
7835  * must precede any reference to it...
7836  *
7837  * [ WFC: In DTD ]
7838  * Parameter-entity references may only appear in the DTD.
7839  * NOTE: misleading but this is handled.
7840  *
7841  * Returns the string of the entity content.
7842  *         str is updated to the current value of the index
7843  */
7844 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)7845 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7846     const xmlChar *ptr;
7847     xmlChar cur;
7848     xmlChar *name;
7849     xmlEntityPtr entity = NULL;
7850 
7851     if ((str == NULL) || (*str == NULL)) return(NULL);
7852     ptr = *str;
7853     cur = *ptr;
7854     if (cur != '%')
7855         return(NULL);
7856     ptr++;
7857     name = xmlParseStringName(ctxt, &ptr);
7858     if (name == NULL) {
7859 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7860 		       "xmlParseStringPEReference: no name\n");
7861 	*str = ptr;
7862 	return(NULL);
7863     }
7864     cur = *ptr;
7865     if (cur != ';') {
7866 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7867 	xmlFree(name);
7868 	*str = ptr;
7869 	return(NULL);
7870     }
7871     ptr++;
7872 
7873     /*
7874      * Increate the number of entity references parsed
7875      */
7876     ctxt->nbentities++;
7877 
7878     /*
7879      * Request the entity from SAX
7880      */
7881     if ((ctxt->sax != NULL) &&
7882 	(ctxt->sax->getParameterEntity != NULL))
7883 	entity = ctxt->sax->getParameterEntity(ctxt->userData,
7884 					       name);
7885     if (entity == NULL) {
7886 	/*
7887 	 * [ WFC: Entity Declared ]
7888 	 * In a document without any DTD, a document with only an
7889 	 * internal DTD subset which contains no parameter entity
7890 	 * references, or a document with "standalone='yes'", ...
7891 	 * ... The declaration of a parameter entity must precede
7892 	 * any reference to it...
7893 	 */
7894 	if ((ctxt->standalone == 1) ||
7895 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7896 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7897 		 "PEReference: %%%s; not found\n", name);
7898 	} else {
7899 	    /*
7900 	     * [ VC: Entity Declared ]
7901 	     * In a document with an external subset or external
7902 	     * parameter entities with "standalone='no'", ...
7903 	     * ... The declaration of a parameter entity must
7904 	     * precede any reference to it...
7905 	     */
7906 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 			  "PEReference: %%%s; not found\n",
7908 			  name, NULL);
7909 	    ctxt->valid = 0;
7910 	}
7911     } else {
7912 	/*
7913 	 * Internal checking in case the entity quest barfed
7914 	 */
7915 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7916 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7917 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7918 			  "%%%s; is not a parameter entity\n",
7919 			  name, NULL);
7920 	}
7921     }
7922     ctxt->hasPErefs = 1;
7923     xmlFree(name);
7924     *str = ptr;
7925     return(entity);
7926 }
7927 
7928 /**
7929  * xmlParseDocTypeDecl:
7930  * @ctxt:  an XML parser context
7931  *
7932  * parse a DOCTYPE declaration
7933  *
7934  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7935  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7936  *
7937  * [ VC: Root Element Type ]
7938  * The Name in the document type declaration must match the element
7939  * type of the root element.
7940  */
7941 
7942 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)7943 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7944     const xmlChar *name = NULL;
7945     xmlChar *ExternalID = NULL;
7946     xmlChar *URI = NULL;
7947 
7948     /*
7949      * We know that '<!DOCTYPE' has been detected.
7950      */
7951     SKIP(9);
7952 
7953     SKIP_BLANKS;
7954 
7955     /*
7956      * Parse the DOCTYPE name.
7957      */
7958     name = xmlParseName(ctxt);
7959     if (name == NULL) {
7960 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7961 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7962     }
7963     ctxt->intSubName = name;
7964 
7965     SKIP_BLANKS;
7966 
7967     /*
7968      * Check for SystemID and ExternalID
7969      */
7970     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7971 
7972     if ((URI != NULL) || (ExternalID != NULL)) {
7973         ctxt->hasExternalSubset = 1;
7974     }
7975     ctxt->extSubURI = URI;
7976     ctxt->extSubSystem = ExternalID;
7977 
7978     SKIP_BLANKS;
7979 
7980     /*
7981      * Create and update the internal subset.
7982      */
7983     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7984 	(!ctxt->disableSAX))
7985 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7986 
7987     /*
7988      * Is there any internal subset declarations ?
7989      * they are handled separately in xmlParseInternalSubset()
7990      */
7991     if (RAW == '[')
7992 	return;
7993 
7994     /*
7995      * We should be at the end of the DOCTYPE declaration.
7996      */
7997     if (RAW != '>') {
7998 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7999     }
8000     NEXT;
8001 }
8002 
8003 /**
8004  * xmlParseInternalSubset:
8005  * @ctxt:  an XML parser context
8006  *
8007  * parse the internal subset declaration
8008  *
8009  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8010  */
8011 
8012 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8013 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8014     /*
8015      * Is there any DTD definition ?
8016      */
8017     if (RAW == '[') {
8018         ctxt->instate = XML_PARSER_DTD;
8019         NEXT;
8020 	/*
8021 	 * Parse the succession of Markup declarations and
8022 	 * PEReferences.
8023 	 * Subsequence (markupdecl | PEReference | S)*
8024 	 */
8025 	while (RAW != ']') {
8026 	    const xmlChar *check = CUR_PTR;
8027 	    unsigned int cons = ctxt->input->consumed;
8028 
8029 	    SKIP_BLANKS;
8030 	    xmlParseMarkupDecl(ctxt);
8031 	    xmlParsePEReference(ctxt);
8032 
8033 	    /*
8034 	     * Pop-up of finished entities.
8035 	     */
8036 	    while ((RAW == 0) && (ctxt->inputNr > 1))
8037 		xmlPopInput(ctxt);
8038 
8039 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8040 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8041 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
8042 		break;
8043 	    }
8044 	}
8045 	if (RAW == ']') {
8046 	    NEXT;
8047 	    SKIP_BLANKS;
8048 	}
8049     }
8050 
8051     /*
8052      * We should be at the end of the DOCTYPE declaration.
8053      */
8054     if (RAW != '>') {
8055 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8056     }
8057     NEXT;
8058 }
8059 
8060 #ifdef LIBXML_SAX1_ENABLED
8061 /**
8062  * xmlParseAttribute:
8063  * @ctxt:  an XML parser context
8064  * @value:  a xmlChar ** used to store the value of the attribute
8065  *
8066  * parse an attribute
8067  *
8068  * [41] Attribute ::= Name Eq AttValue
8069  *
8070  * [ WFC: No External Entity References ]
8071  * Attribute values cannot contain direct or indirect entity references
8072  * to external entities.
8073  *
8074  * [ WFC: No < in Attribute Values ]
8075  * The replacement text of any entity referred to directly or indirectly in
8076  * an attribute value (other than "&lt;") must not contain a <.
8077  *
8078  * [ VC: Attribute Value Type ]
8079  * The attribute must have been declared; the value must be of the type
8080  * declared for it.
8081  *
8082  * [25] Eq ::= S? '=' S?
8083  *
8084  * With namespace:
8085  *
8086  * [NS 11] Attribute ::= QName Eq AttValue
8087  *
8088  * Also the case QName == xmlns:??? is handled independently as a namespace
8089  * definition.
8090  *
8091  * Returns the attribute name, and the value in *value.
8092  */
8093 
8094 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8095 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8096     const xmlChar *name;
8097     xmlChar *val;
8098 
8099     *value = NULL;
8100     GROW;
8101     name = xmlParseName(ctxt);
8102     if (name == NULL) {
8103 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8104 	               "error parsing attribute name\n");
8105         return(NULL);
8106     }
8107 
8108     /*
8109      * read the value
8110      */
8111     SKIP_BLANKS;
8112     if (RAW == '=') {
8113         NEXT;
8114 	SKIP_BLANKS;
8115 	val = xmlParseAttValue(ctxt);
8116 	ctxt->instate = XML_PARSER_CONTENT;
8117     } else {
8118 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8119 	       "Specification mandate value for attribute %s\n", name);
8120 	return(NULL);
8121     }
8122 
8123     /*
8124      * Check that xml:lang conforms to the specification
8125      * No more registered as an error, just generate a warning now
8126      * since this was deprecated in XML second edition
8127      */
8128     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8129 	if (!xmlCheckLanguageID(val)) {
8130 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8131 		          "Malformed value for xml:lang : %s\n",
8132 			  val, NULL);
8133 	}
8134     }
8135 
8136     /*
8137      * Check that xml:space conforms to the specification
8138      */
8139     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8140 	if (xmlStrEqual(val, BAD_CAST "default"))
8141 	    *(ctxt->space) = 0;
8142 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8143 	    *(ctxt->space) = 1;
8144 	else {
8145 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8146 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8147                                  val, NULL);
8148 	}
8149     }
8150 
8151     *value = val;
8152     return(name);
8153 }
8154 
8155 /**
8156  * xmlParseStartTag:
8157  * @ctxt:  an XML parser context
8158  *
8159  * parse a start of tag either for rule element or
8160  * EmptyElement. In both case we don't parse the tag closing chars.
8161  *
8162  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8163  *
8164  * [ WFC: Unique Att Spec ]
8165  * No attribute name may appear more than once in the same start-tag or
8166  * empty-element tag.
8167  *
8168  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8169  *
8170  * [ WFC: Unique Att Spec ]
8171  * No attribute name may appear more than once in the same start-tag or
8172  * empty-element tag.
8173  *
8174  * With namespace:
8175  *
8176  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8177  *
8178  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8179  *
8180  * Returns the element name parsed
8181  */
8182 
8183 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8184 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8185     const xmlChar *name;
8186     const xmlChar *attname;
8187     xmlChar *attvalue;
8188     const xmlChar **atts = ctxt->atts;
8189     int nbatts = 0;
8190     int maxatts = ctxt->maxatts;
8191     int i;
8192 
8193     if (RAW != '<') return(NULL);
8194     NEXT1;
8195 
8196     name = xmlParseName(ctxt);
8197     if (name == NULL) {
8198 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8199 	     "xmlParseStartTag: invalid element name\n");
8200         return(NULL);
8201     }
8202 
8203     /*
8204      * Now parse the attributes, it ends up with the ending
8205      *
8206      * (S Attribute)* S?
8207      */
8208     SKIP_BLANKS;
8209     GROW;
8210 
8211     while ((RAW != '>') &&
8212 	   ((RAW != '/') || (NXT(1) != '>')) &&
8213 	   (IS_BYTE_CHAR(RAW))) {
8214 	const xmlChar *q = CUR_PTR;
8215 	unsigned int cons = ctxt->input->consumed;
8216 
8217 	attname = xmlParseAttribute(ctxt, &attvalue);
8218         if ((attname != NULL) && (attvalue != NULL)) {
8219 	    /*
8220 	     * [ WFC: Unique Att Spec ]
8221 	     * No attribute name may appear more than once in the same
8222 	     * start-tag or empty-element tag.
8223 	     */
8224 	    for (i = 0; i < nbatts;i += 2) {
8225 	        if (xmlStrEqual(atts[i], attname)) {
8226 		    xmlErrAttributeDup(ctxt, NULL, attname);
8227 		    xmlFree(attvalue);
8228 		    goto failed;
8229 		}
8230 	    }
8231 	    /*
8232 	     * Add the pair to atts
8233 	     */
8234 	    if (atts == NULL) {
8235 	        maxatts = 22; /* allow for 10 attrs by default */
8236 	        atts = (const xmlChar **)
8237 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8238 		if (atts == NULL) {
8239 		    xmlErrMemory(ctxt, NULL);
8240 		    if (attvalue != NULL)
8241 			xmlFree(attvalue);
8242 		    goto failed;
8243 		}
8244 		ctxt->atts = atts;
8245 		ctxt->maxatts = maxatts;
8246 	    } else if (nbatts + 4 > maxatts) {
8247 	        const xmlChar **n;
8248 
8249 	        maxatts *= 2;
8250 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8251 					     maxatts * sizeof(const xmlChar *));
8252 		if (n == NULL) {
8253 		    xmlErrMemory(ctxt, NULL);
8254 		    if (attvalue != NULL)
8255 			xmlFree(attvalue);
8256 		    goto failed;
8257 		}
8258 		atts = n;
8259 		ctxt->atts = atts;
8260 		ctxt->maxatts = maxatts;
8261 	    }
8262 	    atts[nbatts++] = attname;
8263 	    atts[nbatts++] = attvalue;
8264 	    atts[nbatts] = NULL;
8265 	    atts[nbatts + 1] = NULL;
8266 	} else {
8267 	    if (attvalue != NULL)
8268 		xmlFree(attvalue);
8269 	}
8270 
8271 failed:
8272 
8273 	GROW
8274 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8275 	    break;
8276 	if (!IS_BLANK_CH(RAW)) {
8277 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8278 			   "attributes construct error\n");
8279 	}
8280 	SKIP_BLANKS;
8281         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8282             (attname == NULL) && (attvalue == NULL)) {
8283 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8284 			   "xmlParseStartTag: problem parsing attributes\n");
8285 	    break;
8286 	}
8287 	SHRINK;
8288         GROW;
8289     }
8290 
8291     /*
8292      * SAX: Start of Element !
8293      */
8294     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8295 	(!ctxt->disableSAX)) {
8296 	if (nbatts > 0)
8297 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8298 	else
8299 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8300     }
8301 
8302     if (atts != NULL) {
8303         /* Free only the content strings */
8304         for (i = 1;i < nbatts;i+=2)
8305 	    if (atts[i] != NULL)
8306 	       xmlFree((xmlChar *) atts[i]);
8307     }
8308     return(name);
8309 }
8310 
8311 /**
8312  * xmlParseEndTag1:
8313  * @ctxt:  an XML parser context
8314  * @line:  line of the start tag
8315  * @nsNr:  number of namespaces on the start tag
8316  *
8317  * parse an end of tag
8318  *
8319  * [42] ETag ::= '</' Name S? '>'
8320  *
8321  * With namespace
8322  *
8323  * [NS 9] ETag ::= '</' QName S? '>'
8324  */
8325 
8326 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8327 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8328     const xmlChar *name;
8329 
8330     GROW;
8331     if ((RAW != '<') || (NXT(1) != '/')) {
8332 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8333 		       "xmlParseEndTag: '</' not found\n");
8334 	return;
8335     }
8336     SKIP(2);
8337 
8338     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8339 
8340     /*
8341      * We should definitely be at the ending "S? '>'" part
8342      */
8343     GROW;
8344     SKIP_BLANKS;
8345     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8346 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8347     } else
8348 	NEXT1;
8349 
8350     /*
8351      * [ WFC: Element Type Match ]
8352      * The Name in an element's end-tag must match the element type in the
8353      * start-tag.
8354      *
8355      */
8356     if (name != (xmlChar*)1) {
8357         if (name == NULL) name = BAD_CAST "unparseable";
8358         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8359 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8360 		                ctxt->name, line, name);
8361     }
8362 
8363     /*
8364      * SAX: End of Tag
8365      */
8366     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8367 	(!ctxt->disableSAX))
8368         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8369 
8370     namePop(ctxt);
8371     spacePop(ctxt);
8372     return;
8373 }
8374 
8375 /**
8376  * xmlParseEndTag:
8377  * @ctxt:  an XML parser context
8378  *
8379  * parse an end of tag
8380  *
8381  * [42] ETag ::= '</' Name S? '>'
8382  *
8383  * With namespace
8384  *
8385  * [NS 9] ETag ::= '</' QName S? '>'
8386  */
8387 
8388 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8389 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8390     xmlParseEndTag1(ctxt, 0);
8391 }
8392 #endif /* LIBXML_SAX1_ENABLED */
8393 
8394 /************************************************************************
8395  *									*
8396  *		      SAX 2 specific operations				*
8397  *									*
8398  ************************************************************************/
8399 
8400 /*
8401  * xmlGetNamespace:
8402  * @ctxt:  an XML parser context
8403  * @prefix:  the prefix to lookup
8404  *
8405  * Lookup the namespace name for the @prefix (which ca be NULL)
8406  * The prefix must come from the @ctxt->dict dictionnary
8407  *
8408  * Returns the namespace name or NULL if not bound
8409  */
8410 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8411 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8412     int i;
8413 
8414     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8415     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8416         if (ctxt->nsTab[i] == prefix) {
8417 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8418 	        return(NULL);
8419 	    return(ctxt->nsTab[i + 1]);
8420 	}
8421     return(NULL);
8422 }
8423 
8424 /**
8425  * xmlParseQName:
8426  * @ctxt:  an XML parser context
8427  * @prefix:  pointer to store the prefix part
8428  *
8429  * parse an XML Namespace QName
8430  *
8431  * [6]  QName  ::= (Prefix ':')? LocalPart
8432  * [7]  Prefix  ::= NCName
8433  * [8]  LocalPart  ::= NCName
8434  *
8435  * Returns the Name parsed or NULL
8436  */
8437 
8438 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8439 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8440     const xmlChar *l, *p;
8441 
8442     GROW;
8443 
8444     l = xmlParseNCName(ctxt);
8445     if (l == NULL) {
8446         if (CUR == ':') {
8447 	    l = xmlParseName(ctxt);
8448 	    if (l != NULL) {
8449 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8450 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8451 		*prefix = NULL;
8452 		return(l);
8453 	    }
8454 	}
8455         return(NULL);
8456     }
8457     if (CUR == ':') {
8458         NEXT;
8459 	p = l;
8460 	l = xmlParseNCName(ctxt);
8461 	if (l == NULL) {
8462 	    xmlChar *tmp;
8463 
8464             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8465 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8466 	    l = xmlParseNmtoken(ctxt);
8467 	    if (l == NULL)
8468 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8469 	    else {
8470 		tmp = xmlBuildQName(l, p, NULL, 0);
8471 		xmlFree((char *)l);
8472 	    }
8473 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8474 	    if (tmp != NULL) xmlFree(tmp);
8475 	    *prefix = NULL;
8476 	    return(p);
8477 	}
8478 	if (CUR == ':') {
8479 	    xmlChar *tmp;
8480 
8481             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8482 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8483 	    NEXT;
8484 	    tmp = (xmlChar *) xmlParseName(ctxt);
8485 	    if (tmp != NULL) {
8486 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8487 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8488 		if (tmp != NULL) xmlFree(tmp);
8489 		*prefix = p;
8490 		return(l);
8491 	    }
8492 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8493 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8494 	    if (tmp != NULL) xmlFree(tmp);
8495 	    *prefix = p;
8496 	    return(l);
8497 	}
8498 	*prefix = p;
8499     } else
8500         *prefix = NULL;
8501     return(l);
8502 }
8503 
8504 /**
8505  * xmlParseQNameAndCompare:
8506  * @ctxt:  an XML parser context
8507  * @name:  the localname
8508  * @prefix:  the prefix, if any.
8509  *
8510  * parse an XML name and compares for match
8511  * (specialized for endtag parsing)
8512  *
8513  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8514  * and the name for mismatch
8515  */
8516 
8517 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8518 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8519                         xmlChar const *prefix) {
8520     const xmlChar *cmp;
8521     const xmlChar *in;
8522     const xmlChar *ret;
8523     const xmlChar *prefix2;
8524 
8525     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8526 
8527     GROW;
8528     in = ctxt->input->cur;
8529 
8530     cmp = prefix;
8531     while (*in != 0 && *in == *cmp) {
8532     	++in;
8533 	++cmp;
8534     }
8535     if ((*cmp == 0) && (*in == ':')) {
8536         in++;
8537 	cmp = name;
8538 	while (*in != 0 && *in == *cmp) {
8539 	    ++in;
8540 	    ++cmp;
8541 	}
8542 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8543 	    /* success */
8544 	    ctxt->input->cur = in;
8545 	    return((const xmlChar*) 1);
8546 	}
8547     }
8548     /*
8549      * all strings coms from the dictionary, equality can be done directly
8550      */
8551     ret = xmlParseQName (ctxt, &prefix2);
8552     if ((ret == name) && (prefix == prefix2))
8553 	return((const xmlChar*) 1);
8554     return ret;
8555 }
8556 
8557 /**
8558  * xmlParseAttValueInternal:
8559  * @ctxt:  an XML parser context
8560  * @len:  attribute len result
8561  * @alloc:  whether the attribute was reallocated as a new string
8562  * @normalize:  if 1 then further non-CDATA normalization must be done
8563  *
8564  * parse a value for an attribute.
8565  * NOTE: if no normalization is needed, the routine will return pointers
8566  *       directly from the data buffer.
8567  *
8568  * 3.3.3 Attribute-Value Normalization:
8569  * Before the value of an attribute is passed to the application or
8570  * checked for validity, the XML processor must normalize it as follows:
8571  * - a character reference is processed by appending the referenced
8572  *   character to the attribute value
8573  * - an entity reference is processed by recursively processing the
8574  *   replacement text of the entity
8575  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8576  *   appending #x20 to the normalized value, except that only a single
8577  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8578  *   parsed entity or the literal entity value of an internal parsed entity
8579  * - other characters are processed by appending them to the normalized value
8580  * If the declared value is not CDATA, then the XML processor must further
8581  * process the normalized attribute value by discarding any leading and
8582  * trailing space (#x20) characters, and by replacing sequences of space
8583  * (#x20) characters by a single space (#x20) character.
8584  * All attributes for which no declaration has been read should be treated
8585  * by a non-validating parser as if declared CDATA.
8586  *
8587  * Returns the AttValue parsed or NULL. The value has to be freed by the
8588  *     caller if it was copied, this can be detected by val[*len] == 0.
8589  */
8590 
8591 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8592 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8593                          int normalize)
8594 {
8595     xmlChar limit = 0;
8596     const xmlChar *in = NULL, *start, *end, *last;
8597     xmlChar *ret = NULL;
8598 
8599     GROW;
8600     in = (xmlChar *) CUR_PTR;
8601     if (*in != '"' && *in != '\'') {
8602         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8603         return (NULL);
8604     }
8605     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8606 
8607     /*
8608      * try to handle in this routine the most common case where no
8609      * allocation of a new string is required and where content is
8610      * pure ASCII.
8611      */
8612     limit = *in++;
8613     end = ctxt->input->end;
8614     start = in;
8615     if (in >= end) {
8616         const xmlChar *oldbase = ctxt->input->base;
8617 	GROW;
8618 	if (oldbase != ctxt->input->base) {
8619 	    long delta = ctxt->input->base - oldbase;
8620 	    start = start + delta;
8621 	    in = in + delta;
8622 	}
8623 	end = ctxt->input->end;
8624     }
8625     if (normalize) {
8626         /*
8627 	 * Skip any leading spaces
8628 	 */
8629 	while ((in < end) && (*in != limit) &&
8630 	       ((*in == 0x20) || (*in == 0x9) ||
8631 	        (*in == 0xA) || (*in == 0xD))) {
8632 	    in++;
8633 	    start = in;
8634 	    if (in >= end) {
8635 		const xmlChar *oldbase = ctxt->input->base;
8636 		GROW;
8637 		if (oldbase != ctxt->input->base) {
8638 		    long delta = ctxt->input->base - oldbase;
8639 		    start = start + delta;
8640 		    in = in + delta;
8641 		}
8642 		end = ctxt->input->end;
8643 	    }
8644 	}
8645 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8646 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8647 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
8648 	    if (in >= end) {
8649 		const xmlChar *oldbase = ctxt->input->base;
8650 		GROW;
8651 		if (oldbase != ctxt->input->base) {
8652 		    long delta = ctxt->input->base - oldbase;
8653 		    start = start + delta;
8654 		    in = in + delta;
8655 		}
8656 		end = ctxt->input->end;
8657 	    }
8658 	}
8659 	last = in;
8660 	/*
8661 	 * skip the trailing blanks
8662 	 */
8663 	while ((last[-1] == 0x20) && (last > start)) last--;
8664 	while ((in < end) && (*in != limit) &&
8665 	       ((*in == 0x20) || (*in == 0x9) ||
8666 	        (*in == 0xA) || (*in == 0xD))) {
8667 	    in++;
8668 	    if (in >= end) {
8669 		const xmlChar *oldbase = ctxt->input->base;
8670 		GROW;
8671 		if (oldbase != ctxt->input->base) {
8672 		    long delta = ctxt->input->base - oldbase;
8673 		    start = start + delta;
8674 		    in = in + delta;
8675 		    last = last + delta;
8676 		}
8677 		end = ctxt->input->end;
8678 	    }
8679 	}
8680 	if (*in != limit) goto need_complex;
8681     } else {
8682 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8683 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8684 	    in++;
8685 	    if (in >= end) {
8686 		const xmlChar *oldbase = ctxt->input->base;
8687 		GROW;
8688 		if (oldbase != ctxt->input->base) {
8689 		    long delta = ctxt->input->base - oldbase;
8690 		    start = start + delta;
8691 		    in = in + delta;
8692 		}
8693 		end = ctxt->input->end;
8694 	    }
8695 	}
8696 	last = in;
8697 	if (*in != limit) goto need_complex;
8698     }
8699     in++;
8700     if (len != NULL) {
8701         *len = last - start;
8702         ret = (xmlChar *) start;
8703     } else {
8704         if (alloc) *alloc = 1;
8705         ret = xmlStrndup(start, last - start);
8706     }
8707     CUR_PTR = in;
8708     if (alloc) *alloc = 0;
8709     return ret;
8710 need_complex:
8711     if (alloc) *alloc = 1;
8712     return xmlParseAttValueComplex(ctxt, len, normalize);
8713 }
8714 
8715 /**
8716  * xmlParseAttribute2:
8717  * @ctxt:  an XML parser context
8718  * @pref:  the element prefix
8719  * @elem:  the element name
8720  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8721  * @value:  a xmlChar ** used to store the value of the attribute
8722  * @len:  an int * to save the length of the attribute
8723  * @alloc:  an int * to indicate if the attribute was allocated
8724  *
8725  * parse an attribute in the new SAX2 framework.
8726  *
8727  * Returns the attribute name, and the value in *value, .
8728  */
8729 
8730 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)8731 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8732                    const xmlChar * pref, const xmlChar * elem,
8733                    const xmlChar ** prefix, xmlChar ** value,
8734                    int *len, int *alloc)
8735 {
8736     const xmlChar *name;
8737     xmlChar *val, *internal_val = NULL;
8738     int normalize = 0;
8739 
8740     *value = NULL;
8741     GROW;
8742     name = xmlParseQName(ctxt, prefix);
8743     if (name == NULL) {
8744         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8745                        "error parsing attribute name\n");
8746         return (NULL);
8747     }
8748 
8749     /*
8750      * get the type if needed
8751      */
8752     if (ctxt->attsSpecial != NULL) {
8753         int type;
8754 
8755         type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8756                                             pref, elem, *prefix, name);
8757         if (type != 0)
8758             normalize = 1;
8759     }
8760 
8761     /*
8762      * read the value
8763      */
8764     SKIP_BLANKS;
8765     if (RAW == '=') {
8766         NEXT;
8767         SKIP_BLANKS;
8768         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8769 	if (normalize) {
8770 	    /*
8771 	     * Sometimes a second normalisation pass for spaces is needed
8772 	     * but that only happens if charrefs or entities refernces
8773 	     * have been used in the attribute value, i.e. the attribute
8774 	     * value have been extracted in an allocated string already.
8775 	     */
8776 	    if (*alloc) {
8777 	        const xmlChar *val2;
8778 
8779 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8780 		if ((val2 != NULL) && (val2 != val)) {
8781 		    xmlFree(val);
8782 		    val = (xmlChar *) val2;
8783 		}
8784 	    }
8785 	}
8786         ctxt->instate = XML_PARSER_CONTENT;
8787     } else {
8788         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8789                           "Specification mandate value for attribute %s\n",
8790                           name);
8791         return (NULL);
8792     }
8793 
8794     if (*prefix == ctxt->str_xml) {
8795         /*
8796          * Check that xml:lang conforms to the specification
8797          * No more registered as an error, just generate a warning now
8798          * since this was deprecated in XML second edition
8799          */
8800         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8801             internal_val = xmlStrndup(val, *len);
8802             if (!xmlCheckLanguageID(internal_val)) {
8803                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8804                               "Malformed value for xml:lang : %s\n",
8805                               internal_val, NULL);
8806             }
8807         }
8808 
8809         /*
8810          * Check that xml:space conforms to the specification
8811          */
8812         if (xmlStrEqual(name, BAD_CAST "space")) {
8813             internal_val = xmlStrndup(val, *len);
8814             if (xmlStrEqual(internal_val, BAD_CAST "default"))
8815                 *(ctxt->space) = 0;
8816             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8817                 *(ctxt->space) = 1;
8818             else {
8819                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8820                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8821                               internal_val, NULL);
8822             }
8823         }
8824         if (internal_val) {
8825             xmlFree(internal_val);
8826         }
8827     }
8828 
8829     *value = val;
8830     return (name);
8831 }
8832 /**
8833  * xmlParseStartTag2:
8834  * @ctxt:  an XML parser context
8835  *
8836  * parse a start of tag either for rule element or
8837  * EmptyElement. In both case we don't parse the tag closing chars.
8838  * This routine is called when running SAX2 parsing
8839  *
8840  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8841  *
8842  * [ WFC: Unique Att Spec ]
8843  * No attribute name may appear more than once in the same start-tag or
8844  * empty-element tag.
8845  *
8846  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8847  *
8848  * [ WFC: Unique Att Spec ]
8849  * No attribute name may appear more than once in the same start-tag or
8850  * empty-element tag.
8851  *
8852  * With namespace:
8853  *
8854  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8855  *
8856  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8857  *
8858  * Returns the element name parsed
8859  */
8860 
8861 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)8862 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8863                   const xmlChar **URI, int *tlen) {
8864     const xmlChar *localname;
8865     const xmlChar *prefix;
8866     const xmlChar *attname;
8867     const xmlChar *aprefix;
8868     const xmlChar *nsname;
8869     xmlChar *attvalue;
8870     const xmlChar **atts = ctxt->atts;
8871     int maxatts = ctxt->maxatts;
8872     int nratts, nbatts, nbdef;
8873     int i, j, nbNs, attval, oldline, oldcol;
8874     const xmlChar *base;
8875     unsigned long cur;
8876     int nsNr = ctxt->nsNr;
8877 
8878     if (RAW != '<') return(NULL);
8879     NEXT1;
8880 
8881     /*
8882      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8883      *       point since the attribute values may be stored as pointers to
8884      *       the buffer and calling SHRINK would destroy them !
8885      *       The Shrinking is only possible once the full set of attribute
8886      *       callbacks have been done.
8887      */
8888 reparse:
8889     SHRINK;
8890     base = ctxt->input->base;
8891     cur = ctxt->input->cur - ctxt->input->base;
8892     oldline = ctxt->input->line;
8893     oldcol = ctxt->input->col;
8894     nbatts = 0;
8895     nratts = 0;
8896     nbdef = 0;
8897     nbNs = 0;
8898     attval = 0;
8899     /* Forget any namespaces added during an earlier parse of this element. */
8900     ctxt->nsNr = nsNr;
8901 
8902     localname = xmlParseQName(ctxt, &prefix);
8903     if (localname == NULL) {
8904 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8905 		       "StartTag: invalid element name\n");
8906         return(NULL);
8907     }
8908     *tlen = ctxt->input->cur - ctxt->input->base - cur;
8909 
8910     /*
8911      * Now parse the attributes, it ends up with the ending
8912      *
8913      * (S Attribute)* S?
8914      */
8915     SKIP_BLANKS;
8916     GROW;
8917     if (ctxt->input->base != base) goto base_changed;
8918 
8919     while ((RAW != '>') &&
8920 	   ((RAW != '/') || (NXT(1) != '>')) &&
8921 	   (IS_BYTE_CHAR(RAW))) {
8922 	const xmlChar *q = CUR_PTR;
8923 	unsigned int cons = ctxt->input->consumed;
8924 	int len = -1, alloc = 0;
8925 
8926 	attname = xmlParseAttribute2(ctxt, prefix, localname,
8927 	                             &aprefix, &attvalue, &len, &alloc);
8928 	if (ctxt->input->base != base) {
8929 	    if ((attvalue != NULL) && (alloc != 0))
8930 	        xmlFree(attvalue);
8931 	    attvalue = NULL;
8932 	    goto base_changed;
8933 	}
8934         if ((attname != NULL) && (attvalue != NULL)) {
8935 	    if (len < 0) len = xmlStrlen(attvalue);
8936             if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8937 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8938 		xmlURIPtr uri;
8939 
8940                 if (*URL != 0) {
8941 		    uri = xmlParseURI((const char *) URL);
8942 		    if (uri == NULL) {
8943 			xmlNsErr(ctxt, XML_WAR_NS_URI,
8944 			         "xmlns: '%s' is not a valid URI\n",
8945 					   URL, NULL, NULL);
8946 		    } else {
8947 			if (uri->scheme == NULL) {
8948 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8949 				      "xmlns: URI %s is not absolute\n",
8950 				      URL, NULL, NULL);
8951 			}
8952 			xmlFreeURI(uri);
8953 		    }
8954 		    if (URL == ctxt->str_xml_ns) {
8955 			if (attname != ctxt->str_xml) {
8956 			    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8957 			 "xml namespace URI cannot be the default namespace\n",
8958 				     NULL, NULL, NULL);
8959 			}
8960 			goto skip_default_ns;
8961 		    }
8962 		    if ((len == 29) &&
8963 			(xmlStrEqual(URL,
8964 				 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8965 			xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8966 			     "reuse of the xmlns namespace name is forbidden\n",
8967 				 NULL, NULL, NULL);
8968 			goto skip_default_ns;
8969 		    }
8970 		}
8971 		/*
8972 		 * check that it's not a defined namespace
8973 		 */
8974 		for (j = 1;j <= nbNs;j++)
8975 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8976 			break;
8977 		if (j <= nbNs)
8978 		    xmlErrAttributeDup(ctxt, NULL, attname);
8979 		else
8980 		    if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8981 skip_default_ns:
8982 		if (alloc != 0) xmlFree(attvalue);
8983 		SKIP_BLANKS;
8984 		continue;
8985 	    }
8986             if (aprefix == ctxt->str_xmlns) {
8987 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8988 		xmlURIPtr uri;
8989 
8990                 if (attname == ctxt->str_xml) {
8991 		    if (URL != ctxt->str_xml_ns) {
8992 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8993 			         "xml namespace prefix mapped to wrong URI\n",
8994 			         NULL, NULL, NULL);
8995 		    }
8996 		    /*
8997 		     * Do not keep a namespace definition node
8998 		     */
8999 		    goto skip_ns;
9000 		}
9001                 if (URL == ctxt->str_xml_ns) {
9002 		    if (attname != ctxt->str_xml) {
9003 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9004 			         "xml namespace URI mapped to wrong prefix\n",
9005 			         NULL, NULL, NULL);
9006 		    }
9007 		    goto skip_ns;
9008 		}
9009                 if (attname == ctxt->str_xmlns) {
9010 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9011 			     "redefinition of the xmlns prefix is forbidden\n",
9012 			     NULL, NULL, NULL);
9013 		    goto skip_ns;
9014 		}
9015 		if ((len == 29) &&
9016 		    (xmlStrEqual(URL,
9017 		                 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9018 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9019 			     "reuse of the xmlns namespace name is forbidden\n",
9020 			     NULL, NULL, NULL);
9021 		    goto skip_ns;
9022 		}
9023 		if ((URL == NULL) || (URL[0] == 0)) {
9024 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9025 		             "xmlns:%s: Empty XML namespace is not allowed\n",
9026 			          attname, NULL, NULL);
9027 		    goto skip_ns;
9028 		} else {
9029 		    uri = xmlParseURI((const char *) URL);
9030 		    if (uri == NULL) {
9031 			xmlNsErr(ctxt, XML_WAR_NS_URI,
9032 			     "xmlns:%s: '%s' is not a valid URI\n",
9033 					   attname, URL, NULL);
9034 		    } else {
9035 			if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9036 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9037 				      "xmlns:%s: URI %s is not absolute\n",
9038 				      attname, URL, NULL);
9039 			}
9040 			xmlFreeURI(uri);
9041 		    }
9042 		}
9043 
9044 		/*
9045 		 * check that it's not a defined namespace
9046 		 */
9047 		for (j = 1;j <= nbNs;j++)
9048 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9049 			break;
9050 		if (j <= nbNs)
9051 		    xmlErrAttributeDup(ctxt, aprefix, attname);
9052 		else
9053 		    if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9054 skip_ns:
9055 		if (alloc != 0) xmlFree(attvalue);
9056 		SKIP_BLANKS;
9057 		if (ctxt->input->base != base) goto base_changed;
9058 		continue;
9059 	    }
9060 
9061 	    /*
9062 	     * Add the pair to atts
9063 	     */
9064 	    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9065 	        if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9066 		    if (attvalue[len] == 0)
9067 			xmlFree(attvalue);
9068 		    goto failed;
9069 		}
9070 	        maxatts = ctxt->maxatts;
9071 		atts = ctxt->atts;
9072 	    }
9073 	    ctxt->attallocs[nratts++] = alloc;
9074 	    atts[nbatts++] = attname;
9075 	    atts[nbatts++] = aprefix;
9076 	    atts[nbatts++] = NULL; /* the URI will be fetched later */
9077 	    atts[nbatts++] = attvalue;
9078 	    attvalue += len;
9079 	    atts[nbatts++] = attvalue;
9080 	    /*
9081 	     * tag if some deallocation is needed
9082 	     */
9083 	    if (alloc != 0) attval = 1;
9084 	} else {
9085 	    if ((attvalue != NULL) && (attvalue[len] == 0))
9086 		xmlFree(attvalue);
9087 	}
9088 
9089 failed:
9090 
9091 	GROW
9092 	if (ctxt->input->base != base) goto base_changed;
9093 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9094 	    break;
9095 	if (!IS_BLANK_CH(RAW)) {
9096 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9097 			   "attributes construct error\n");
9098 	    break;
9099 	}
9100 	SKIP_BLANKS;
9101         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9102             (attname == NULL) && (attvalue == NULL)) {
9103 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9104 	         "xmlParseStartTag: problem parsing attributes\n");
9105 	    break;
9106 	}
9107         GROW;
9108 	if (ctxt->input->base != base) goto base_changed;
9109     }
9110 
9111     /*
9112      * The attributes defaulting
9113      */
9114     if (ctxt->attsDefault != NULL) {
9115         xmlDefAttrsPtr defaults;
9116 
9117 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9118 	if (defaults != NULL) {
9119 	    for (i = 0;i < defaults->nbAttrs;i++) {
9120 	        attname = defaults->values[5 * i];
9121 		aprefix = defaults->values[5 * i + 1];
9122 
9123                 /*
9124 		 * special work for namespaces defaulted defs
9125 		 */
9126 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9127 		    /*
9128 		     * check that it's not a defined namespace
9129 		     */
9130 		    for (j = 1;j <= nbNs;j++)
9131 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9132 			    break;
9133 	            if (j <= nbNs) continue;
9134 
9135 		    nsname = xmlGetNamespace(ctxt, NULL);
9136 		    if (nsname != defaults->values[5 * i + 2]) {
9137 			if (nsPush(ctxt, NULL,
9138 			           defaults->values[5 * i + 2]) > 0)
9139 			    nbNs++;
9140 		    }
9141 		} else if (aprefix == ctxt->str_xmlns) {
9142 		    /*
9143 		     * check that it's not a defined namespace
9144 		     */
9145 		    for (j = 1;j <= nbNs;j++)
9146 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9147 			    break;
9148 	            if (j <= nbNs) continue;
9149 
9150 		    nsname = xmlGetNamespace(ctxt, attname);
9151 		    if (nsname != defaults->values[2]) {
9152 			if (nsPush(ctxt, attname,
9153 			           defaults->values[5 * i + 2]) > 0)
9154 			    nbNs++;
9155 		    }
9156 		} else {
9157 		    /*
9158 		     * check that it's not a defined attribute
9159 		     */
9160 		    for (j = 0;j < nbatts;j+=5) {
9161 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9162 			    break;
9163 		    }
9164 		    if (j < nbatts) continue;
9165 
9166 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9167 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9168 			    return(NULL);
9169 			}
9170 			maxatts = ctxt->maxatts;
9171 			atts = ctxt->atts;
9172 		    }
9173 		    atts[nbatts++] = attname;
9174 		    atts[nbatts++] = aprefix;
9175 		    if (aprefix == NULL)
9176 			atts[nbatts++] = NULL;
9177 		    else
9178 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9179 		    atts[nbatts++] = defaults->values[5 * i + 2];
9180 		    atts[nbatts++] = defaults->values[5 * i + 3];
9181 		    if ((ctxt->standalone == 1) &&
9182 		        (defaults->values[5 * i + 4] != NULL)) {
9183 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9184 	  "standalone: attribute %s on %s defaulted from external subset\n",
9185 	                                 attname, localname);
9186 		    }
9187 		    nbdef++;
9188 		}
9189 	    }
9190 	}
9191     }
9192 
9193     /*
9194      * The attributes checkings
9195      */
9196     for (i = 0; i < nbatts;i += 5) {
9197         /*
9198 	* The default namespace does not apply to attribute names.
9199 	*/
9200 	if (atts[i + 1] != NULL) {
9201 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9202 	    if (nsname == NULL) {
9203 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9204 		    "Namespace prefix %s for %s on %s is not defined\n",
9205 		    atts[i + 1], atts[i], localname);
9206 	    }
9207 	    atts[i + 2] = nsname;
9208 	} else
9209 	    nsname = NULL;
9210 	/*
9211 	 * [ WFC: Unique Att Spec ]
9212 	 * No attribute name may appear more than once in the same
9213 	 * start-tag or empty-element tag.
9214 	 * As extended by the Namespace in XML REC.
9215 	 */
9216         for (j = 0; j < i;j += 5) {
9217 	    if (atts[i] == atts[j]) {
9218 	        if (atts[i+1] == atts[j+1]) {
9219 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9220 		    break;
9221 		}
9222 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9223 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9224 			     "Namespaced Attribute %s in '%s' redefined\n",
9225 			     atts[i], nsname, NULL);
9226 		    break;
9227 		}
9228 	    }
9229 	}
9230     }
9231 
9232     nsname = xmlGetNamespace(ctxt, prefix);
9233     if ((prefix != NULL) && (nsname == NULL)) {
9234 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9235 	         "Namespace prefix %s on %s is not defined\n",
9236 		 prefix, localname, NULL);
9237     }
9238     *pref = prefix;
9239     *URI = nsname;
9240 
9241     /*
9242      * SAX: Start of Element !
9243      */
9244     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9245 	(!ctxt->disableSAX)) {
9246 	if (nbNs > 0)
9247 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9248 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9249 			  nbatts / 5, nbdef, atts);
9250 	else
9251 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9252 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9253     }
9254 
9255     /*
9256      * Free up attribute allocated strings if needed
9257      */
9258     if (attval != 0) {
9259 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9260 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9261 	        xmlFree((xmlChar *) atts[i]);
9262     }
9263 
9264     return(localname);
9265 
9266 base_changed:
9267     /*
9268      * the attribute strings are valid iif the base didn't changed
9269      */
9270     if (attval != 0) {
9271 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9272 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9273 	        xmlFree((xmlChar *) atts[i]);
9274     }
9275     ctxt->input->cur = ctxt->input->base + cur;
9276     ctxt->input->line = oldline;
9277     ctxt->input->col = oldcol;
9278     if (ctxt->wellFormed == 1) {
9279 	goto reparse;
9280     }
9281     return(NULL);
9282 }
9283 
9284 /**
9285  * xmlParseEndTag2:
9286  * @ctxt:  an XML parser context
9287  * @line:  line of the start tag
9288  * @nsNr:  number of namespaces on the start tag
9289  *
9290  * parse an end of tag
9291  *
9292  * [42] ETag ::= '</' Name S? '>'
9293  *
9294  * With namespace
9295  *
9296  * [NS 9] ETag ::= '</' QName S? '>'
9297  */
9298 
9299 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9300 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9301                 const xmlChar *URI, int line, int nsNr, int tlen) {
9302     const xmlChar *name;
9303 
9304     GROW;
9305     if ((RAW != '<') || (NXT(1) != '/')) {
9306 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9307 	return;
9308     }
9309     SKIP(2);
9310 
9311     if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9312         if (ctxt->input->cur[tlen] == '>') {
9313 	    ctxt->input->cur += tlen + 1;
9314 	    goto done;
9315 	}
9316 	ctxt->input->cur += tlen;
9317 	name = (xmlChar*)1;
9318     } else {
9319 	if (prefix == NULL)
9320 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9321 	else
9322 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9323     }
9324 
9325     /*
9326      * We should definitely be at the ending "S? '>'" part
9327      */
9328     GROW;
9329     SKIP_BLANKS;
9330     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9331 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9332     } else
9333 	NEXT1;
9334 
9335     /*
9336      * [ WFC: Element Type Match ]
9337      * The Name in an element's end-tag must match the element type in the
9338      * start-tag.
9339      *
9340      */
9341     if (name != (xmlChar*)1) {
9342         if (name == NULL) name = BAD_CAST "unparseable";
9343         if ((line == 0) && (ctxt->node != NULL))
9344             line = ctxt->node->line;
9345         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9346 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9347 		                ctxt->name, line, name);
9348     }
9349 
9350     /*
9351      * SAX: End of Tag
9352      */
9353 done:
9354     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9355 	(!ctxt->disableSAX))
9356 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9357 
9358     spacePop(ctxt);
9359     if (nsNr != 0)
9360 	nsPop(ctxt, nsNr);
9361     return;
9362 }
9363 
9364 /**
9365  * xmlParseCDSect:
9366  * @ctxt:  an XML parser context
9367  *
9368  * Parse escaped pure raw content.
9369  *
9370  * [18] CDSect ::= CDStart CData CDEnd
9371  *
9372  * [19] CDStart ::= '<![CDATA['
9373  *
9374  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9375  *
9376  * [21] CDEnd ::= ']]>'
9377  */
9378 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9379 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9380     xmlChar *buf = NULL;
9381     int len = 0;
9382     int size = XML_PARSER_BUFFER_SIZE;
9383     int r, rl;
9384     int	s, sl;
9385     int cur, l;
9386     int count = 0;
9387 
9388     /* Check 2.6.0 was NXT(0) not RAW */
9389     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9390 	SKIP(9);
9391     } else
9392         return;
9393 
9394     ctxt->instate = XML_PARSER_CDATA_SECTION;
9395     r = CUR_CHAR(rl);
9396     if (!IS_CHAR(r)) {
9397 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9398 	ctxt->instate = XML_PARSER_CONTENT;
9399         return;
9400     }
9401     NEXTL(rl);
9402     s = CUR_CHAR(sl);
9403     if (!IS_CHAR(s)) {
9404 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9405 	ctxt->instate = XML_PARSER_CONTENT;
9406         return;
9407     }
9408     NEXTL(sl);
9409     cur = CUR_CHAR(l);
9410     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9411     if (buf == NULL) {
9412 	xmlErrMemory(ctxt, NULL);
9413 	return;
9414     }
9415     while (IS_CHAR(cur) &&
9416            ((r != ']') || (s != ']') || (cur != '>'))) {
9417 	if (len + 5 >= size) {
9418 	    xmlChar *tmp;
9419 
9420 	    size *= 2;
9421 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9422 	    if (tmp == NULL) {
9423 	        xmlFree(buf);
9424 		xmlErrMemory(ctxt, NULL);
9425 		return;
9426 	    }
9427 	    buf = tmp;
9428 	}
9429 	COPY_BUF(rl,buf,len,r);
9430 	r = s;
9431 	rl = sl;
9432 	s = cur;
9433 	sl = l;
9434 	count++;
9435 	if (count > 50) {
9436 	    GROW;
9437 	    count = 0;
9438 	}
9439 	NEXTL(l);
9440 	cur = CUR_CHAR(l);
9441     }
9442     buf[len] = 0;
9443     ctxt->instate = XML_PARSER_CONTENT;
9444     if (cur != '>') {
9445 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9446 	                     "CData section not finished\n%.50s\n", buf);
9447 	xmlFree(buf);
9448         return;
9449     }
9450     NEXTL(l);
9451 
9452     /*
9453      * OK the buffer is to be consumed as cdata.
9454      */
9455     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9456 	if (ctxt->sax->cdataBlock != NULL)
9457 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9458 	else if (ctxt->sax->characters != NULL)
9459 	    ctxt->sax->characters(ctxt->userData, buf, len);
9460     }
9461     xmlFree(buf);
9462 }
9463 
9464 /**
9465  * xmlParseContent:
9466  * @ctxt:  an XML parser context
9467  *
9468  * Parse a content:
9469  *
9470  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9471  */
9472 
9473 void
xmlParseContent(xmlParserCtxtPtr ctxt)9474 xmlParseContent(xmlParserCtxtPtr ctxt) {
9475     GROW;
9476     while ((RAW != 0) &&
9477 	   ((RAW != '<') || (NXT(1) != '/')) &&
9478 	   (ctxt->instate != XML_PARSER_EOF)) {
9479 	const xmlChar *test = CUR_PTR;
9480 	unsigned int cons = ctxt->input->consumed;
9481 	const xmlChar *cur = ctxt->input->cur;
9482 
9483 	/*
9484 	 * First case : a Processing Instruction.
9485 	 */
9486 	if ((*cur == '<') && (cur[1] == '?')) {
9487 	    xmlParsePI(ctxt);
9488 	}
9489 
9490 	/*
9491 	 * Second case : a CDSection
9492 	 */
9493 	/* 2.6.0 test was *cur not RAW */
9494 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9495 	    xmlParseCDSect(ctxt);
9496 	}
9497 
9498 	/*
9499 	 * Third case :  a comment
9500 	 */
9501 	else if ((*cur == '<') && (NXT(1) == '!') &&
9502 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9503 	    xmlParseComment(ctxt);
9504 	    ctxt->instate = XML_PARSER_CONTENT;
9505 	}
9506 
9507 	/*
9508 	 * Fourth case :  a sub-element.
9509 	 */
9510 	else if (*cur == '<') {
9511 	    xmlParseElement(ctxt);
9512 	}
9513 
9514 	/*
9515 	 * Fifth case : a reference. If if has not been resolved,
9516 	 *    parsing returns it's Name, create the node
9517 	 */
9518 
9519 	else if (*cur == '&') {
9520 	    xmlParseReference(ctxt);
9521 	}
9522 
9523 	/*
9524 	 * Last case, text. Note that References are handled directly.
9525 	 */
9526 	else {
9527 	    xmlParseCharData(ctxt, 0);
9528 	}
9529 
9530 	GROW;
9531 	/*
9532 	 * Pop-up of finished entities.
9533 	 */
9534 	while ((RAW == 0) && (ctxt->inputNr > 1))
9535 	    xmlPopInput(ctxt);
9536 	SHRINK;
9537 
9538 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9539 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9540 	                "detected an error in element content\n");
9541 	    ctxt->instate = XML_PARSER_EOF;
9542             break;
9543 	}
9544     }
9545 }
9546 
9547 /**
9548  * xmlParseElement:
9549  * @ctxt:  an XML parser context
9550  *
9551  * parse an XML element, this is highly recursive
9552  *
9553  * [39] element ::= EmptyElemTag | STag content ETag
9554  *
9555  * [ WFC: Element Type Match ]
9556  * The Name in an element's end-tag must match the element type in the
9557  * start-tag.
9558  *
9559  */
9560 
9561 void
xmlParseElement(xmlParserCtxtPtr ctxt)9562 xmlParseElement(xmlParserCtxtPtr ctxt) {
9563     const xmlChar *name;
9564     const xmlChar *prefix = NULL;
9565     const xmlChar *URI = NULL;
9566     xmlParserNodeInfo node_info;
9567     int line, tlen;
9568     xmlNodePtr ret;
9569     int nsNr = ctxt->nsNr;
9570 
9571     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9572         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9573 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9574 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9575 			  xmlParserMaxDepth);
9576 	ctxt->instate = XML_PARSER_EOF;
9577 	return;
9578     }
9579 
9580     /* Capture start position */
9581     if (ctxt->record_info) {
9582         node_info.begin_pos = ctxt->input->consumed +
9583                           (CUR_PTR - ctxt->input->base);
9584 	node_info.begin_line = ctxt->input->line;
9585     }
9586 
9587     if (ctxt->spaceNr == 0)
9588 	spacePush(ctxt, -1);
9589     else if (*ctxt->space == -2)
9590 	spacePush(ctxt, -1);
9591     else
9592 	spacePush(ctxt, *ctxt->space);
9593 
9594     line = ctxt->input->line;
9595 #ifdef LIBXML_SAX1_ENABLED
9596     if (ctxt->sax2)
9597 #endif /* LIBXML_SAX1_ENABLED */
9598         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9599 #ifdef LIBXML_SAX1_ENABLED
9600     else
9601 	name = xmlParseStartTag(ctxt);
9602 #endif /* LIBXML_SAX1_ENABLED */
9603     if (ctxt->instate == XML_PARSER_EOF)
9604 	return;
9605     if (name == NULL) {
9606 	spacePop(ctxt);
9607         return;
9608     }
9609     namePush(ctxt, name);
9610     ret = ctxt->node;
9611 
9612 #ifdef LIBXML_VALID_ENABLED
9613     /*
9614      * [ VC: Root Element Type ]
9615      * The Name in the document type declaration must match the element
9616      * type of the root element.
9617      */
9618     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9619         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9620         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9621 #endif /* LIBXML_VALID_ENABLED */
9622 
9623     /*
9624      * Check for an Empty Element.
9625      */
9626     if ((RAW == '/') && (NXT(1) == '>')) {
9627         SKIP(2);
9628 	if (ctxt->sax2) {
9629 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9630 		(!ctxt->disableSAX))
9631 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9632 #ifdef LIBXML_SAX1_ENABLED
9633 	} else {
9634 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9635 		(!ctxt->disableSAX))
9636 		ctxt->sax->endElement(ctxt->userData, name);
9637 #endif /* LIBXML_SAX1_ENABLED */
9638 	}
9639 	namePop(ctxt);
9640 	spacePop(ctxt);
9641 	if (nsNr != ctxt->nsNr)
9642 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9643 	if ( ret != NULL && ctxt->record_info ) {
9644 	   node_info.end_pos = ctxt->input->consumed +
9645 			      (CUR_PTR - ctxt->input->base);
9646 	   node_info.end_line = ctxt->input->line;
9647 	   node_info.node = ret;
9648 	   xmlParserAddNodeInfo(ctxt, &node_info);
9649 	}
9650 	return;
9651     }
9652     if (RAW == '>') {
9653         NEXT1;
9654     } else {
9655         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9656 		     "Couldn't find end of Start Tag %s line %d\n",
9657 		                name, line, NULL);
9658 
9659 	/*
9660 	 * end of parsing of this node.
9661 	 */
9662 	nodePop(ctxt);
9663 	namePop(ctxt);
9664 	spacePop(ctxt);
9665 	if (nsNr != ctxt->nsNr)
9666 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9667 
9668 	/*
9669 	 * Capture end position and add node
9670 	 */
9671 	if ( ret != NULL && ctxt->record_info ) {
9672 	   node_info.end_pos = ctxt->input->consumed +
9673 			      (CUR_PTR - ctxt->input->base);
9674 	   node_info.end_line = ctxt->input->line;
9675 	   node_info.node = ret;
9676 	   xmlParserAddNodeInfo(ctxt, &node_info);
9677 	}
9678 	return;
9679     }
9680 
9681     /*
9682      * Parse the content of the element:
9683      */
9684     xmlParseContent(ctxt);
9685     if (!IS_BYTE_CHAR(RAW)) {
9686         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9687 	 "Premature end of data in tag %s line %d\n",
9688 		                name, line, NULL);
9689 
9690 	/*
9691 	 * end of parsing of this node.
9692 	 */
9693 	nodePop(ctxt);
9694 	namePop(ctxt);
9695 	spacePop(ctxt);
9696 	if (nsNr != ctxt->nsNr)
9697 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9698 	return;
9699     }
9700 
9701     /*
9702      * parse the end of tag: '</' should be here.
9703      */
9704     if (ctxt->sax2) {
9705 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9706 	namePop(ctxt);
9707     }
9708 #ifdef LIBXML_SAX1_ENABLED
9709       else
9710 	xmlParseEndTag1(ctxt, line);
9711 #endif /* LIBXML_SAX1_ENABLED */
9712 
9713     /*
9714      * Capture end position and add node
9715      */
9716     if ( ret != NULL && ctxt->record_info ) {
9717        node_info.end_pos = ctxt->input->consumed +
9718                           (CUR_PTR - ctxt->input->base);
9719        node_info.end_line = ctxt->input->line;
9720        node_info.node = ret;
9721        xmlParserAddNodeInfo(ctxt, &node_info);
9722     }
9723 }
9724 
9725 /**
9726  * xmlParseVersionNum:
9727  * @ctxt:  an XML parser context
9728  *
9729  * parse the XML version value.
9730  *
9731  * [26] VersionNum ::= '1.' [0-9]+
9732  *
9733  * In practice allow [0-9].[0-9]+ at that level
9734  *
9735  * Returns the string giving the XML version number, or NULL
9736  */
9737 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9738 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9739     xmlChar *buf = NULL;
9740     int len = 0;
9741     int size = 10;
9742     xmlChar cur;
9743 
9744     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9745     if (buf == NULL) {
9746 	xmlErrMemory(ctxt, NULL);
9747 	return(NULL);
9748     }
9749     cur = CUR;
9750     if (!((cur >= '0') && (cur <= '9'))) {
9751 	xmlFree(buf);
9752 	return(NULL);
9753     }
9754     buf[len++] = cur;
9755     NEXT;
9756     cur=CUR;
9757     if (cur != '.') {
9758 	xmlFree(buf);
9759 	return(NULL);
9760     }
9761     buf[len++] = cur;
9762     NEXT;
9763     cur=CUR;
9764     while ((cur >= '0') && (cur <= '9')) {
9765 	if (len + 1 >= size) {
9766 	    xmlChar *tmp;
9767 
9768 	    size *= 2;
9769 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9770 	    if (tmp == NULL) {
9771 	        xmlFree(buf);
9772 		xmlErrMemory(ctxt, NULL);
9773 		return(NULL);
9774 	    }
9775 	    buf = tmp;
9776 	}
9777 	buf[len++] = cur;
9778 	NEXT;
9779 	cur=CUR;
9780     }
9781     buf[len] = 0;
9782     return(buf);
9783 }
9784 
9785 /**
9786  * xmlParseVersionInfo:
9787  * @ctxt:  an XML parser context
9788  *
9789  * parse the XML version.
9790  *
9791  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9792  *
9793  * [25] Eq ::= S? '=' S?
9794  *
9795  * Returns the version string, e.g. "1.0"
9796  */
9797 
9798 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)9799 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9800     xmlChar *version = NULL;
9801 
9802     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9803 	SKIP(7);
9804 	SKIP_BLANKS;
9805 	if (RAW != '=') {
9806 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9807 	    return(NULL);
9808         }
9809 	NEXT;
9810 	SKIP_BLANKS;
9811 	if (RAW == '"') {
9812 	    NEXT;
9813 	    version = xmlParseVersionNum(ctxt);
9814 	    if (RAW != '"') {
9815 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9816 	    } else
9817 	        NEXT;
9818 	} else if (RAW == '\''){
9819 	    NEXT;
9820 	    version = xmlParseVersionNum(ctxt);
9821 	    if (RAW != '\'') {
9822 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9823 	    } else
9824 	        NEXT;
9825 	} else {
9826 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9827 	}
9828     }
9829     return(version);
9830 }
9831 
9832 /**
9833  * xmlParseEncName:
9834  * @ctxt:  an XML parser context
9835  *
9836  * parse the XML encoding name
9837  *
9838  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9839  *
9840  * Returns the encoding name value or NULL
9841  */
9842 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)9843 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9844     xmlChar *buf = NULL;
9845     int len = 0;
9846     int size = 10;
9847     xmlChar cur;
9848 
9849     cur = CUR;
9850     if (((cur >= 'a') && (cur <= 'z')) ||
9851         ((cur >= 'A') && (cur <= 'Z'))) {
9852 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9853 	if (buf == NULL) {
9854 	    xmlErrMemory(ctxt, NULL);
9855 	    return(NULL);
9856 	}
9857 
9858 	buf[len++] = cur;
9859 	NEXT;
9860 	cur = CUR;
9861 	while (((cur >= 'a') && (cur <= 'z')) ||
9862 	       ((cur >= 'A') && (cur <= 'Z')) ||
9863 	       ((cur >= '0') && (cur <= '9')) ||
9864 	       (cur == '.') || (cur == '_') ||
9865 	       (cur == '-')) {
9866 	    if (len + 1 >= size) {
9867 	        xmlChar *tmp;
9868 
9869 		size *= 2;
9870 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9871 		if (tmp == NULL) {
9872 		    xmlErrMemory(ctxt, NULL);
9873 		    xmlFree(buf);
9874 		    return(NULL);
9875 		}
9876 		buf = tmp;
9877 	    }
9878 	    buf[len++] = cur;
9879 	    NEXT;
9880 	    cur = CUR;
9881 	    if (cur == 0) {
9882 	        SHRINK;
9883 		GROW;
9884 		cur = CUR;
9885 	    }
9886         }
9887 	buf[len] = 0;
9888     } else {
9889 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9890     }
9891     return(buf);
9892 }
9893 
9894 /**
9895  * xmlParseEncodingDecl:
9896  * @ctxt:  an XML parser context
9897  *
9898  * parse the XML encoding declaration
9899  *
9900  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
9901  *
9902  * this setups the conversion filters.
9903  *
9904  * Returns the encoding value or NULL
9905  */
9906 
9907 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)9908 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9909     xmlChar *encoding = NULL;
9910 
9911     SKIP_BLANKS;
9912     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9913 	SKIP(8);
9914 	SKIP_BLANKS;
9915 	if (RAW != '=') {
9916 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9917 	    return(NULL);
9918         }
9919 	NEXT;
9920 	SKIP_BLANKS;
9921 	if (RAW == '"') {
9922 	    NEXT;
9923 	    encoding = xmlParseEncName(ctxt);
9924 	    if (RAW != '"') {
9925 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9926 	    } else
9927 	        NEXT;
9928 	} else if (RAW == '\''){
9929 	    NEXT;
9930 	    encoding = xmlParseEncName(ctxt);
9931 	    if (RAW != '\'') {
9932 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9933 	    } else
9934 	        NEXT;
9935 	} else {
9936 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9937 	}
9938 
9939         /*
9940          * Non standard parsing, allowing the user to ignore encoding
9941          */
9942         if (ctxt->options & XML_PARSE_IGNORE_ENC)
9943             return(encoding);
9944 
9945 	/*
9946 	 * UTF-16 encoding stwich has already taken place at this stage,
9947 	 * more over the little-endian/big-endian selection is already done
9948 	 */
9949         if ((encoding != NULL) &&
9950 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9951 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9952 	    /*
9953 	     * If no encoding was passed to the parser, that we are
9954 	     * using UTF-16 and no decoder is present i.e. the
9955 	     * document is apparently UTF-8 compatible, then raise an
9956 	     * encoding mismatch fatal error
9957 	     */
9958 	    if ((ctxt->encoding == NULL) &&
9959 	        (ctxt->input->buf != NULL) &&
9960 	        (ctxt->input->buf->encoder == NULL)) {
9961 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9962 		  "Document labelled UTF-16 but has UTF-8 content\n");
9963 	    }
9964 	    if (ctxt->encoding != NULL)
9965 		xmlFree((xmlChar *) ctxt->encoding);
9966 	    ctxt->encoding = encoding;
9967 	}
9968 	/*
9969 	 * UTF-8 encoding is handled natively
9970 	 */
9971         else if ((encoding != NULL) &&
9972 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9973 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9974 	    if (ctxt->encoding != NULL)
9975 		xmlFree((xmlChar *) ctxt->encoding);
9976 	    ctxt->encoding = encoding;
9977 	}
9978 	else if (encoding != NULL) {
9979 	    xmlCharEncodingHandlerPtr handler;
9980 
9981 	    if (ctxt->input->encoding != NULL)
9982 		xmlFree((xmlChar *) ctxt->input->encoding);
9983 	    ctxt->input->encoding = encoding;
9984 
9985             handler = xmlFindCharEncodingHandler((const char *) encoding);
9986 	    if (handler != NULL) {
9987 		xmlSwitchToEncoding(ctxt, handler);
9988 	    } else {
9989 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9990 			"Unsupported encoding %s\n", encoding);
9991 		return(NULL);
9992 	    }
9993 	}
9994     }
9995     return(encoding);
9996 }
9997 
9998 /**
9999  * xmlParseSDDecl:
10000  * @ctxt:  an XML parser context
10001  *
10002  * parse the XML standalone declaration
10003  *
10004  * [32] SDDecl ::= S 'standalone' Eq
10005  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10006  *
10007  * [ VC: Standalone Document Declaration ]
10008  * TODO The standalone document declaration must have the value "no"
10009  * if any external markup declarations contain declarations of:
10010  *  - attributes with default values, if elements to which these
10011  *    attributes apply appear in the document without specifications
10012  *    of values for these attributes, or
10013  *  - entities (other than amp, lt, gt, apos, quot), if references
10014  *    to those entities appear in the document, or
10015  *  - attributes with values subject to normalization, where the
10016  *    attribute appears in the document with a value which will change
10017  *    as a result of normalization, or
10018  *  - element types with element content, if white space occurs directly
10019  *    within any instance of those types.
10020  *
10021  * Returns:
10022  *   1 if standalone="yes"
10023  *   0 if standalone="no"
10024  *  -2 if standalone attribute is missing or invalid
10025  *	  (A standalone value of -2 means that the XML declaration was found,
10026  *	   but no value was specified for the standalone attribute).
10027  */
10028 
10029 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10030 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10031     int standalone = -2;
10032 
10033     SKIP_BLANKS;
10034     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10035 	SKIP(10);
10036         SKIP_BLANKS;
10037 	if (RAW != '=') {
10038 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10039 	    return(standalone);
10040         }
10041 	NEXT;
10042 	SKIP_BLANKS;
10043         if (RAW == '\''){
10044 	    NEXT;
10045 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10046 	        standalone = 0;
10047                 SKIP(2);
10048 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10049 	               (NXT(2) == 's')) {
10050 	        standalone = 1;
10051 		SKIP(3);
10052             } else {
10053 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10054 	    }
10055 	    if (RAW != '\'') {
10056 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10057 	    } else
10058 	        NEXT;
10059 	} else if (RAW == '"'){
10060 	    NEXT;
10061 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10062 	        standalone = 0;
10063 		SKIP(2);
10064 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10065 	               (NXT(2) == 's')) {
10066 	        standalone = 1;
10067                 SKIP(3);
10068             } else {
10069 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10070 	    }
10071 	    if (RAW != '"') {
10072 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10073 	    } else
10074 	        NEXT;
10075 	} else {
10076 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10077         }
10078     }
10079     return(standalone);
10080 }
10081 
10082 /**
10083  * xmlParseXMLDecl:
10084  * @ctxt:  an XML parser context
10085  *
10086  * parse an XML declaration header
10087  *
10088  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10089  */
10090 
10091 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10092 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10093     xmlChar *version;
10094 
10095     /*
10096      * This value for standalone indicates that the document has an
10097      * XML declaration but it does not have a standalone attribute.
10098      * It will be overwritten later if a standalone attribute is found.
10099      */
10100     ctxt->input->standalone = -2;
10101 
10102     /*
10103      * We know that '<?xml' is here.
10104      */
10105     SKIP(5);
10106 
10107     if (!IS_BLANK_CH(RAW)) {
10108 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10109 	               "Blank needed after '<?xml'\n");
10110     }
10111     SKIP_BLANKS;
10112 
10113     /*
10114      * We must have the VersionInfo here.
10115      */
10116     version = xmlParseVersionInfo(ctxt);
10117     if (version == NULL) {
10118 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10119     } else {
10120 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10121 	    /*
10122 	     * Changed here for XML-1.0 5th edition
10123 	     */
10124 	    if (ctxt->options & XML_PARSE_OLD10) {
10125 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10126 			          "Unsupported version '%s'\n",
10127 			          version);
10128 	    } else {
10129 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10130 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10131 		                  "Unsupported version '%s'\n",
10132 				  version, NULL);
10133 		} else {
10134 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10135 				      "Unsupported version '%s'\n",
10136 				      version);
10137 		}
10138 	    }
10139 	}
10140 	if (ctxt->version != NULL)
10141 	    xmlFree((void *) ctxt->version);
10142 	ctxt->version = version;
10143     }
10144 
10145     /*
10146      * We may have the encoding declaration
10147      */
10148     if (!IS_BLANK_CH(RAW)) {
10149         if ((RAW == '?') && (NXT(1) == '>')) {
10150 	    SKIP(2);
10151 	    return;
10152 	}
10153 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10154     }
10155     xmlParseEncodingDecl(ctxt);
10156     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10157 	/*
10158 	 * The XML REC instructs us to stop parsing right here
10159 	 */
10160         return;
10161     }
10162 
10163     /*
10164      * We may have the standalone status.
10165      */
10166     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10167         if ((RAW == '?') && (NXT(1) == '>')) {
10168 	    SKIP(2);
10169 	    return;
10170 	}
10171 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10172     }
10173 
10174     /*
10175      * We can grow the input buffer freely at that point
10176      */
10177     GROW;
10178 
10179     SKIP_BLANKS;
10180     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10181 
10182     SKIP_BLANKS;
10183     if ((RAW == '?') && (NXT(1) == '>')) {
10184         SKIP(2);
10185     } else if (RAW == '>') {
10186         /* Deprecated old WD ... */
10187 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10188 	NEXT;
10189     } else {
10190 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10191 	MOVETO_ENDTAG(CUR_PTR);
10192 	NEXT;
10193     }
10194 }
10195 
10196 /**
10197  * xmlParseMisc:
10198  * @ctxt:  an XML parser context
10199  *
10200  * parse an XML Misc* optional field.
10201  *
10202  * [27] Misc ::= Comment | PI |  S
10203  */
10204 
10205 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10206 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10207     while (((RAW == '<') && (NXT(1) == '?')) ||
10208            (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10209            IS_BLANK_CH(CUR)) {
10210         if ((RAW == '<') && (NXT(1) == '?')) {
10211 	    xmlParsePI(ctxt);
10212 	} else if (IS_BLANK_CH(CUR)) {
10213 	    NEXT;
10214 	} else
10215 	    xmlParseComment(ctxt);
10216     }
10217 }
10218 
10219 /**
10220  * xmlParseDocument:
10221  * @ctxt:  an XML parser context
10222  *
10223  * parse an XML document (and build a tree if using the standard SAX
10224  * interface).
10225  *
10226  * [1] document ::= prolog element Misc*
10227  *
10228  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10229  *
10230  * Returns 0, -1 in case of error. the parser context is augmented
10231  *                as a result of the parsing.
10232  */
10233 
10234 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10235 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10236     xmlChar start[4];
10237     xmlCharEncoding enc;
10238 
10239     xmlInitParser();
10240 
10241     if ((ctxt == NULL) || (ctxt->input == NULL))
10242         return(-1);
10243 
10244     GROW;
10245 
10246     /*
10247      * SAX: detecting the level.
10248      */
10249     xmlDetectSAX2(ctxt);
10250 
10251     /*
10252      * SAX: beginning of the document processing.
10253      */
10254     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10255         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10256 
10257     if ((ctxt->encoding == NULL) &&
10258         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10259 	/*
10260 	 * Get the 4 first bytes and decode the charset
10261 	 * if enc != XML_CHAR_ENCODING_NONE
10262 	 * plug some encoding conversion routines.
10263 	 */
10264 	start[0] = RAW;
10265 	start[1] = NXT(1);
10266 	start[2] = NXT(2);
10267 	start[3] = NXT(3);
10268 	enc = xmlDetectCharEncoding(&start[0], 4);
10269 	if (enc != XML_CHAR_ENCODING_NONE) {
10270 	    xmlSwitchEncoding(ctxt, enc);
10271 	}
10272     }
10273 
10274 
10275     if (CUR == 0) {
10276 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10277     }
10278 
10279     /*
10280      * Check for the XMLDecl in the Prolog.
10281      * do not GROW here to avoid the detected encoder to decode more
10282      * than just the first line, unless the amount of data is really
10283      * too small to hold "<?xml version="1.0" encoding="foo"
10284      */
10285     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10286        GROW;
10287     }
10288     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10289 
10290 	/*
10291 	 * Note that we will switch encoding on the fly.
10292 	 */
10293 	xmlParseXMLDecl(ctxt);
10294 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10295 	    /*
10296 	     * The XML REC instructs us to stop parsing right here
10297 	     */
10298 	    return(-1);
10299 	}
10300 	ctxt->standalone = ctxt->input->standalone;
10301 	SKIP_BLANKS;
10302     } else {
10303 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10304     }
10305     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10306         ctxt->sax->startDocument(ctxt->userData);
10307 
10308     /*
10309      * The Misc part of the Prolog
10310      */
10311     GROW;
10312     xmlParseMisc(ctxt);
10313 
10314     /*
10315      * Then possibly doc type declaration(s) and more Misc
10316      * (doctypedecl Misc*)?
10317      */
10318     GROW;
10319     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10320 
10321 	ctxt->inSubset = 1;
10322 	xmlParseDocTypeDecl(ctxt);
10323 	if (RAW == '[') {
10324 	    ctxt->instate = XML_PARSER_DTD;
10325 	    xmlParseInternalSubset(ctxt);
10326 	}
10327 
10328 	/*
10329 	 * Create and update the external subset.
10330 	 */
10331 	ctxt->inSubset = 2;
10332 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10333 	    (!ctxt->disableSAX))
10334 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10335 	                              ctxt->extSubSystem, ctxt->extSubURI);
10336 	ctxt->inSubset = 0;
10337 
10338         xmlCleanSpecialAttr(ctxt);
10339 
10340 	ctxt->instate = XML_PARSER_PROLOG;
10341 	xmlParseMisc(ctxt);
10342     }
10343 
10344     /*
10345      * Time to start parsing the tree itself
10346      */
10347     GROW;
10348     if (RAW != '<') {
10349 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10350 		       "Start tag expected, '<' not found\n");
10351     } else {
10352 	ctxt->instate = XML_PARSER_CONTENT;
10353 	xmlParseElement(ctxt);
10354 	ctxt->instate = XML_PARSER_EPILOG;
10355 
10356 
10357 	/*
10358 	 * The Misc part at the end
10359 	 */
10360 	xmlParseMisc(ctxt);
10361 
10362 	if (RAW != 0) {
10363 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10364 	}
10365 	ctxt->instate = XML_PARSER_EOF;
10366     }
10367 
10368     /*
10369      * SAX: end of the document processing.
10370      */
10371     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10372         ctxt->sax->endDocument(ctxt->userData);
10373 
10374     /*
10375      * Remove locally kept entity definitions if the tree was not built
10376      */
10377     if ((ctxt->myDoc != NULL) &&
10378 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10379 	xmlFreeDoc(ctxt->myDoc);
10380 	ctxt->myDoc = NULL;
10381     }
10382 
10383     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10384         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10385 	if (ctxt->valid)
10386 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10387 	if (ctxt->nsWellFormed)
10388 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10389 	if (ctxt->options & XML_PARSE_OLD10)
10390 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10391     }
10392     if (! ctxt->wellFormed) {
10393 	ctxt->valid = 0;
10394 	return(-1);
10395     }
10396     return(0);
10397 }
10398 
10399 /**
10400  * xmlParseExtParsedEnt:
10401  * @ctxt:  an XML parser context
10402  *
10403  * parse a general parsed entity
10404  * An external general parsed entity is well-formed if it matches the
10405  * production labeled extParsedEnt.
10406  *
10407  * [78] extParsedEnt ::= TextDecl? content
10408  *
10409  * Returns 0, -1 in case of error. the parser context is augmented
10410  *                as a result of the parsing.
10411  */
10412 
10413 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10414 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10415     xmlChar start[4];
10416     xmlCharEncoding enc;
10417 
10418     if ((ctxt == NULL) || (ctxt->input == NULL))
10419         return(-1);
10420 
10421     xmlDefaultSAXHandlerInit();
10422 
10423     xmlDetectSAX2(ctxt);
10424 
10425     GROW;
10426 
10427     /*
10428      * SAX: beginning of the document processing.
10429      */
10430     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10431         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10432 
10433     /*
10434      * Get the 4 first bytes and decode the charset
10435      * if enc != XML_CHAR_ENCODING_NONE
10436      * plug some encoding conversion routines.
10437      */
10438     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10439 	start[0] = RAW;
10440 	start[1] = NXT(1);
10441 	start[2] = NXT(2);
10442 	start[3] = NXT(3);
10443 	enc = xmlDetectCharEncoding(start, 4);
10444 	if (enc != XML_CHAR_ENCODING_NONE) {
10445 	    xmlSwitchEncoding(ctxt, enc);
10446 	}
10447     }
10448 
10449 
10450     if (CUR == 0) {
10451 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10452     }
10453 
10454     /*
10455      * Check for the XMLDecl in the Prolog.
10456      */
10457     GROW;
10458     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10459 
10460 	/*
10461 	 * Note that we will switch encoding on the fly.
10462 	 */
10463 	xmlParseXMLDecl(ctxt);
10464 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10465 	    /*
10466 	     * The XML REC instructs us to stop parsing right here
10467 	     */
10468 	    return(-1);
10469 	}
10470 	SKIP_BLANKS;
10471     } else {
10472 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10473     }
10474     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10475         ctxt->sax->startDocument(ctxt->userData);
10476 
10477     /*
10478      * Doing validity checking on chunk doesn't make sense
10479      */
10480     ctxt->instate = XML_PARSER_CONTENT;
10481     ctxt->validate = 0;
10482     ctxt->loadsubset = 0;
10483     ctxt->depth = 0;
10484 
10485     xmlParseContent(ctxt);
10486 
10487     if ((RAW == '<') && (NXT(1) == '/')) {
10488 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10489     } else if (RAW != 0) {
10490 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10491     }
10492 
10493     /*
10494      * SAX: end of the document processing.
10495      */
10496     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10497         ctxt->sax->endDocument(ctxt->userData);
10498 
10499     if (! ctxt->wellFormed) return(-1);
10500     return(0);
10501 }
10502 
10503 #ifdef LIBXML_PUSH_ENABLED
10504 /************************************************************************
10505  *									*
10506  * 		Progressive parsing interfaces				*
10507  *									*
10508  ************************************************************************/
10509 
10510 /**
10511  * xmlParseLookupSequence:
10512  * @ctxt:  an XML parser context
10513  * @first:  the first char to lookup
10514  * @next:  the next char to lookup or zero
10515  * @third:  the next char to lookup or zero
10516  *
10517  * Try to find if a sequence (first, next, third) or  just (first next) or
10518  * (first) is available in the input stream.
10519  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10520  * to avoid rescanning sequences of bytes, it DOES change the state of the
10521  * parser, do not use liberally.
10522  *
10523  * Returns the index to the current parsing point if the full sequence
10524  *      is available, -1 otherwise.
10525  */
10526 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10527 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10528                        xmlChar next, xmlChar third) {
10529     int base, len;
10530     xmlParserInputPtr in;
10531     const xmlChar *buf;
10532 
10533     in = ctxt->input;
10534     if (in == NULL) return(-1);
10535     base = in->cur - in->base;
10536     if (base < 0) return(-1);
10537     if (ctxt->checkIndex > base)
10538         base = ctxt->checkIndex;
10539     if (in->buf == NULL) {
10540 	buf = in->base;
10541 	len = in->length;
10542     } else {
10543 	buf = in->buf->buffer->content;
10544 	len = in->buf->buffer->use;
10545     }
10546     /* take into account the sequence length */
10547     if (third) len -= 2;
10548     else if (next) len --;
10549     for (;base < len;base++) {
10550         if (buf[base] == first) {
10551 	    if (third != 0) {
10552 		if ((buf[base + 1] != next) ||
10553 		    (buf[base + 2] != third)) continue;
10554 	    } else if (next != 0) {
10555 		if (buf[base + 1] != next) continue;
10556 	    }
10557 	    ctxt->checkIndex = 0;
10558 #ifdef DEBUG_PUSH
10559 	    if (next == 0)
10560 		xmlGenericError(xmlGenericErrorContext,
10561 			"PP: lookup '%c' found at %d\n",
10562 			first, base);
10563 	    else if (third == 0)
10564 		xmlGenericError(xmlGenericErrorContext,
10565 			"PP: lookup '%c%c' found at %d\n",
10566 			first, next, base);
10567 	    else
10568 		xmlGenericError(xmlGenericErrorContext,
10569 			"PP: lookup '%c%c%c' found at %d\n",
10570 			first, next, third, base);
10571 #endif
10572 	    return(base - (in->cur - in->base));
10573 	}
10574     }
10575     ctxt->checkIndex = base;
10576 #ifdef DEBUG_PUSH
10577     if (next == 0)
10578 	xmlGenericError(xmlGenericErrorContext,
10579 		"PP: lookup '%c' failed\n", first);
10580     else if (third == 0)
10581 	xmlGenericError(xmlGenericErrorContext,
10582 		"PP: lookup '%c%c' failed\n", first, next);
10583     else
10584 	xmlGenericError(xmlGenericErrorContext,
10585 		"PP: lookup '%c%c%c' failed\n", first, next, third);
10586 #endif
10587     return(-1);
10588 }
10589 
10590 /**
10591  * xmlParseGetLasts:
10592  * @ctxt:  an XML parser context
10593  * @lastlt:  pointer to store the last '<' from the input
10594  * @lastgt:  pointer to store the last '>' from the input
10595  *
10596  * Lookup the last < and > in the current chunk
10597  */
10598 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10599 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10600                  const xmlChar **lastgt) {
10601     const xmlChar *tmp;
10602 
10603     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10604 	xmlGenericError(xmlGenericErrorContext,
10605 		    "Internal error: xmlParseGetLasts\n");
10606 	return;
10607     }
10608     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10609         tmp = ctxt->input->end;
10610 	tmp--;
10611 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10612 	if (tmp < ctxt->input->base) {
10613 	    *lastlt = NULL;
10614 	    *lastgt = NULL;
10615 	} else {
10616 	    *lastlt = tmp;
10617 	    tmp++;
10618 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10619 	        if (*tmp == '\'') {
10620 		    tmp++;
10621 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10622 		    if (tmp < ctxt->input->end) tmp++;
10623 		} else if (*tmp == '"') {
10624 		    tmp++;
10625 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10626 		    if (tmp < ctxt->input->end) tmp++;
10627 		} else
10628 		    tmp++;
10629 	    }
10630 	    if (tmp < ctxt->input->end)
10631 	        *lastgt = tmp;
10632 	    else {
10633 	        tmp = *lastlt;
10634 		tmp--;
10635 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10636 		if (tmp >= ctxt->input->base)
10637 		    *lastgt = tmp;
10638 		else
10639 		    *lastgt = NULL;
10640 	    }
10641 	}
10642     } else {
10643         *lastlt = NULL;
10644 	*lastgt = NULL;
10645     }
10646 }
10647 /**
10648  * xmlCheckCdataPush:
10649  * @cur: pointer to the bock of characters
10650  * @len: length of the block in bytes
10651  *
10652  * Check that the block of characters is okay as SCdata content [20]
10653  *
10654  * Returns the number of bytes to pass if okay, a negative index where an
10655  *         UTF-8 error occured otherwise
10656  */
10657 static int
xmlCheckCdataPush(const xmlChar * utf,int len)10658 xmlCheckCdataPush(const xmlChar *utf, int len) {
10659     int ix;
10660     unsigned char c;
10661     int codepoint;
10662 
10663     if ((utf == NULL) || (len <= 0))
10664         return(0);
10665 
10666     for (ix = 0; ix < len;) {      /* string is 0-terminated */
10667         c = utf[ix];
10668         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
10669 	    if (c >= 0x20)
10670 		ix++;
10671 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10672 	        ix++;
10673 	    else
10674 	        return(-ix);
10675 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10676 	    if (ix + 2 > len) return(ix);
10677 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
10678 	        return(-ix);
10679 	    codepoint = (utf[ix] & 0x1f) << 6;
10680 	    codepoint |= utf[ix+1] & 0x3f;
10681 	    if (!xmlIsCharQ(codepoint))
10682 	        return(-ix);
10683 	    ix += 2;
10684 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10685 	    if (ix + 3 > len) return(ix);
10686 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10687 	        ((utf[ix+2] & 0xc0) != 0x80))
10688 		    return(-ix);
10689 	    codepoint = (utf[ix] & 0xf) << 12;
10690 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
10691 	    codepoint |= utf[ix+2] & 0x3f;
10692 	    if (!xmlIsCharQ(codepoint))
10693 	        return(-ix);
10694 	    ix += 3;
10695 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10696 	    if (ix + 4 > len) return(ix);
10697 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10698 	        ((utf[ix+2] & 0xc0) != 0x80) ||
10699 		((utf[ix+3] & 0xc0) != 0x80))
10700 		    return(-ix);
10701 	    codepoint = (utf[ix] & 0x7) << 18;
10702 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
10703 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
10704 	    codepoint |= utf[ix+3] & 0x3f;
10705 	    if (!xmlIsCharQ(codepoint))
10706 	        return(-ix);
10707 	    ix += 4;
10708 	} else				/* unknown encoding */
10709 	    return(-ix);
10710       }
10711       return(ix);
10712 }
10713 
10714 /**
10715  * xmlParseTryOrFinish:
10716  * @ctxt:  an XML parser context
10717  * @terminate:  last chunk indicator
10718  *
10719  * Try to progress on parsing
10720  *
10721  * Returns zero if no parsing was possible
10722  */
10723 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)10724 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10725     int ret = 0;
10726     int avail, tlen;
10727     xmlChar cur, next;
10728     const xmlChar *lastlt, *lastgt;
10729 
10730     if (ctxt->input == NULL)
10731         return(0);
10732 
10733 #ifdef DEBUG_PUSH
10734     switch (ctxt->instate) {
10735 	case XML_PARSER_EOF:
10736 	    xmlGenericError(xmlGenericErrorContext,
10737 		    "PP: try EOF\n"); break;
10738 	case XML_PARSER_START:
10739 	    xmlGenericError(xmlGenericErrorContext,
10740 		    "PP: try START\n"); break;
10741 	case XML_PARSER_MISC:
10742 	    xmlGenericError(xmlGenericErrorContext,
10743 		    "PP: try MISC\n");break;
10744 	case XML_PARSER_COMMENT:
10745 	    xmlGenericError(xmlGenericErrorContext,
10746 		    "PP: try COMMENT\n");break;
10747 	case XML_PARSER_PROLOG:
10748 	    xmlGenericError(xmlGenericErrorContext,
10749 		    "PP: try PROLOG\n");break;
10750 	case XML_PARSER_START_TAG:
10751 	    xmlGenericError(xmlGenericErrorContext,
10752 		    "PP: try START_TAG\n");break;
10753 	case XML_PARSER_CONTENT:
10754 	    xmlGenericError(xmlGenericErrorContext,
10755 		    "PP: try CONTENT\n");break;
10756 	case XML_PARSER_CDATA_SECTION:
10757 	    xmlGenericError(xmlGenericErrorContext,
10758 		    "PP: try CDATA_SECTION\n");break;
10759 	case XML_PARSER_END_TAG:
10760 	    xmlGenericError(xmlGenericErrorContext,
10761 		    "PP: try END_TAG\n");break;
10762 	case XML_PARSER_ENTITY_DECL:
10763 	    xmlGenericError(xmlGenericErrorContext,
10764 		    "PP: try ENTITY_DECL\n");break;
10765 	case XML_PARSER_ENTITY_VALUE:
10766 	    xmlGenericError(xmlGenericErrorContext,
10767 		    "PP: try ENTITY_VALUE\n");break;
10768 	case XML_PARSER_ATTRIBUTE_VALUE:
10769 	    xmlGenericError(xmlGenericErrorContext,
10770 		    "PP: try ATTRIBUTE_VALUE\n");break;
10771 	case XML_PARSER_DTD:
10772 	    xmlGenericError(xmlGenericErrorContext,
10773 		    "PP: try DTD\n");break;
10774 	case XML_PARSER_EPILOG:
10775 	    xmlGenericError(xmlGenericErrorContext,
10776 		    "PP: try EPILOG\n");break;
10777 	case XML_PARSER_PI:
10778 	    xmlGenericError(xmlGenericErrorContext,
10779 		    "PP: try PI\n");break;
10780         case XML_PARSER_IGNORE:
10781             xmlGenericError(xmlGenericErrorContext,
10782 		    "PP: try IGNORE\n");break;
10783     }
10784 #endif
10785 
10786     if ((ctxt->input != NULL) &&
10787         (ctxt->input->cur - ctxt->input->base > 4096)) {
10788 	xmlSHRINK(ctxt);
10789 	ctxt->checkIndex = 0;
10790     }
10791     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10792 
10793     while (1) {
10794 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10795 	    return(0);
10796 
10797 
10798 	/*
10799 	 * Pop-up of finished entities.
10800 	 */
10801 	while ((RAW == 0) && (ctxt->inputNr > 1))
10802 	    xmlPopInput(ctxt);
10803 
10804 	if (ctxt->input == NULL) break;
10805 	if (ctxt->input->buf == NULL)
10806 	    avail = ctxt->input->length -
10807 	            (ctxt->input->cur - ctxt->input->base);
10808 	else {
10809 	    /*
10810 	     * If we are operating on converted input, try to flush
10811 	     * remainng chars to avoid them stalling in the non-converted
10812 	     * buffer.
10813 	     */
10814 	    if ((ctxt->input->buf->raw != NULL) &&
10815 		(ctxt->input->buf->raw->use > 0)) {
10816 		int base = ctxt->input->base -
10817 		           ctxt->input->buf->buffer->content;
10818 		int current = ctxt->input->cur - ctxt->input->base;
10819 
10820 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10821 		ctxt->input->base = ctxt->input->buf->buffer->content + base;
10822 		ctxt->input->cur = ctxt->input->base + current;
10823 		ctxt->input->end =
10824 		    &ctxt->input->buf->buffer->content[
10825 		                       ctxt->input->buf->buffer->use];
10826 	    }
10827 	    avail = ctxt->input->buf->buffer->use -
10828 		    (ctxt->input->cur - ctxt->input->base);
10829 	}
10830         if (avail < 1)
10831 	    goto done;
10832         switch (ctxt->instate) {
10833             case XML_PARSER_EOF:
10834 	        /*
10835 		 * Document parsing is done !
10836 		 */
10837 	        goto done;
10838             case XML_PARSER_START:
10839 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10840 		    xmlChar start[4];
10841 		    xmlCharEncoding enc;
10842 
10843 		    /*
10844 		     * Very first chars read from the document flow.
10845 		     */
10846 		    if (avail < 4)
10847 			goto done;
10848 
10849 		    /*
10850 		     * Get the 4 first bytes and decode the charset
10851 		     * if enc != XML_CHAR_ENCODING_NONE
10852 		     * plug some encoding conversion routines,
10853 		     * else xmlSwitchEncoding will set to (default)
10854 		     * UTF8.
10855 		     */
10856 		    start[0] = RAW;
10857 		    start[1] = NXT(1);
10858 		    start[2] = NXT(2);
10859 		    start[3] = NXT(3);
10860 		    enc = xmlDetectCharEncoding(start, 4);
10861 		    xmlSwitchEncoding(ctxt, enc);
10862 		    break;
10863 		}
10864 
10865 		if (avail < 2)
10866 		    goto done;
10867 		cur = ctxt->input->cur[0];
10868 		next = ctxt->input->cur[1];
10869 		if (cur == 0) {
10870 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10871 			ctxt->sax->setDocumentLocator(ctxt->userData,
10872 						      &xmlDefaultSAXLocator);
10873 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10874 		    ctxt->instate = XML_PARSER_EOF;
10875 #ifdef DEBUG_PUSH
10876 		    xmlGenericError(xmlGenericErrorContext,
10877 			    "PP: entering EOF\n");
10878 #endif
10879 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10880 			ctxt->sax->endDocument(ctxt->userData);
10881 		    goto done;
10882 		}
10883 	        if ((cur == '<') && (next == '?')) {
10884 		    /* PI or XML decl */
10885 		    if (avail < 5) return(ret);
10886 		    if ((!terminate) &&
10887 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10888 			return(ret);
10889 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10890 			ctxt->sax->setDocumentLocator(ctxt->userData,
10891 						      &xmlDefaultSAXLocator);
10892 		    if ((ctxt->input->cur[2] == 'x') &&
10893 			(ctxt->input->cur[3] == 'm') &&
10894 			(ctxt->input->cur[4] == 'l') &&
10895 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
10896 			ret += 5;
10897 #ifdef DEBUG_PUSH
10898 			xmlGenericError(xmlGenericErrorContext,
10899 				"PP: Parsing XML Decl\n");
10900 #endif
10901 			xmlParseXMLDecl(ctxt);
10902 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10903 			    /*
10904 			     * The XML REC instructs us to stop parsing right
10905 			     * here
10906 			     */
10907 			    ctxt->instate = XML_PARSER_EOF;
10908 			    return(0);
10909 			}
10910 			ctxt->standalone = ctxt->input->standalone;
10911 			if ((ctxt->encoding == NULL) &&
10912 			    (ctxt->input->encoding != NULL))
10913 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10914 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10915 			    (!ctxt->disableSAX))
10916 			    ctxt->sax->startDocument(ctxt->userData);
10917 			ctxt->instate = XML_PARSER_MISC;
10918 #ifdef DEBUG_PUSH
10919 			xmlGenericError(xmlGenericErrorContext,
10920 				"PP: entering MISC\n");
10921 #endif
10922 		    } else {
10923 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10924 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10925 			    (!ctxt->disableSAX))
10926 			    ctxt->sax->startDocument(ctxt->userData);
10927 			ctxt->instate = XML_PARSER_MISC;
10928 #ifdef DEBUG_PUSH
10929 			xmlGenericError(xmlGenericErrorContext,
10930 				"PP: entering MISC\n");
10931 #endif
10932 		    }
10933 		} else {
10934 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10935 			ctxt->sax->setDocumentLocator(ctxt->userData,
10936 						      &xmlDefaultSAXLocator);
10937 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10938 		    if (ctxt->version == NULL) {
10939 		        xmlErrMemory(ctxt, NULL);
10940 			break;
10941 		    }
10942 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10943 		        (!ctxt->disableSAX))
10944 			ctxt->sax->startDocument(ctxt->userData);
10945 		    ctxt->instate = XML_PARSER_MISC;
10946 #ifdef DEBUG_PUSH
10947 		    xmlGenericError(xmlGenericErrorContext,
10948 			    "PP: entering MISC\n");
10949 #endif
10950 		}
10951 		break;
10952             case XML_PARSER_START_TAG: {
10953 	        const xmlChar *name;
10954 		const xmlChar *prefix = NULL;
10955 		const xmlChar *URI = NULL;
10956 		int nsNr = ctxt->nsNr;
10957 
10958 		if ((avail < 2) && (ctxt->inputNr == 1))
10959 		    goto done;
10960 		cur = ctxt->input->cur[0];
10961 	        if (cur != '<') {
10962 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10963 		    ctxt->instate = XML_PARSER_EOF;
10964 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10965 			ctxt->sax->endDocument(ctxt->userData);
10966 		    goto done;
10967 		}
10968 		if (!terminate) {
10969 		    if (ctxt->progressive) {
10970 		        /* > can be found unescaped in attribute values */
10971 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10972 			    goto done;
10973 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10974 			goto done;
10975 		    }
10976 		}
10977 		if (ctxt->spaceNr == 0)
10978 		    spacePush(ctxt, -1);
10979 		else if (*ctxt->space == -2)
10980 		    spacePush(ctxt, -1);
10981 		else
10982 		    spacePush(ctxt, *ctxt->space);
10983 #ifdef LIBXML_SAX1_ENABLED
10984 		if (ctxt->sax2)
10985 #endif /* LIBXML_SAX1_ENABLED */
10986 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10987 #ifdef LIBXML_SAX1_ENABLED
10988 		else
10989 		    name = xmlParseStartTag(ctxt);
10990 #endif /* LIBXML_SAX1_ENABLED */
10991 		if (ctxt->instate == XML_PARSER_EOF)
10992 		    goto done;
10993 		if (name == NULL) {
10994 		    spacePop(ctxt);
10995 		    ctxt->instate = XML_PARSER_EOF;
10996 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10997 			ctxt->sax->endDocument(ctxt->userData);
10998 		    goto done;
10999 		}
11000 #ifdef LIBXML_VALID_ENABLED
11001 		/*
11002 		 * [ VC: Root Element Type ]
11003 		 * The Name in the document type declaration must match
11004 		 * the element type of the root element.
11005 		 */
11006 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11007 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11008 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11009 #endif /* LIBXML_VALID_ENABLED */
11010 
11011 		/*
11012 		 * Check for an Empty Element.
11013 		 */
11014 		if ((RAW == '/') && (NXT(1) == '>')) {
11015 		    SKIP(2);
11016 
11017 		    if (ctxt->sax2) {
11018 			if ((ctxt->sax != NULL) &&
11019 			    (ctxt->sax->endElementNs != NULL) &&
11020 			    (!ctxt->disableSAX))
11021 			    ctxt->sax->endElementNs(ctxt->userData, name,
11022 			                            prefix, URI);
11023 			if (ctxt->nsNr - nsNr > 0)
11024 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11025 #ifdef LIBXML_SAX1_ENABLED
11026 		    } else {
11027 			if ((ctxt->sax != NULL) &&
11028 			    (ctxt->sax->endElement != NULL) &&
11029 			    (!ctxt->disableSAX))
11030 			    ctxt->sax->endElement(ctxt->userData, name);
11031 #endif /* LIBXML_SAX1_ENABLED */
11032 		    }
11033 		    spacePop(ctxt);
11034 		    if (ctxt->nameNr == 0) {
11035 			ctxt->instate = XML_PARSER_EPILOG;
11036 		    } else {
11037 			ctxt->instate = XML_PARSER_CONTENT;
11038 		    }
11039 		    break;
11040 		}
11041 		if (RAW == '>') {
11042 		    NEXT;
11043 		} else {
11044 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11045 					 "Couldn't find end of Start Tag %s\n",
11046 					 name);
11047 		    nodePop(ctxt);
11048 		    spacePop(ctxt);
11049 		}
11050 		if (ctxt->sax2)
11051 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11052 #ifdef LIBXML_SAX1_ENABLED
11053 		else
11054 		    namePush(ctxt, name);
11055 #endif /* LIBXML_SAX1_ENABLED */
11056 
11057 		ctxt->instate = XML_PARSER_CONTENT;
11058                 break;
11059 	    }
11060             case XML_PARSER_CONTENT: {
11061 		const xmlChar *test;
11062 		unsigned int cons;
11063 		if ((avail < 2) && (ctxt->inputNr == 1))
11064 		    goto done;
11065 		cur = ctxt->input->cur[0];
11066 		next = ctxt->input->cur[1];
11067 
11068 		test = CUR_PTR;
11069 	        cons = ctxt->input->consumed;
11070 		if ((cur == '<') && (next == '/')) {
11071 		    ctxt->instate = XML_PARSER_END_TAG;
11072 		    break;
11073 	        } else if ((cur == '<') && (next == '?')) {
11074 		    if ((!terminate) &&
11075 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11076 			goto done;
11077 		    xmlParsePI(ctxt);
11078 		} else if ((cur == '<') && (next != '!')) {
11079 		    ctxt->instate = XML_PARSER_START_TAG;
11080 		    break;
11081 		} else if ((cur == '<') && (next == '!') &&
11082 		           (ctxt->input->cur[2] == '-') &&
11083 			   (ctxt->input->cur[3] == '-')) {
11084 		    int term;
11085 
11086 	            if (avail < 4)
11087 		        goto done;
11088 		    ctxt->input->cur += 4;
11089 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11090 		    ctxt->input->cur -= 4;
11091 		    if ((!terminate) && (term < 0))
11092 			goto done;
11093 		    xmlParseComment(ctxt);
11094 		    ctxt->instate = XML_PARSER_CONTENT;
11095 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11096 		    (ctxt->input->cur[2] == '[') &&
11097 		    (ctxt->input->cur[3] == 'C') &&
11098 		    (ctxt->input->cur[4] == 'D') &&
11099 		    (ctxt->input->cur[5] == 'A') &&
11100 		    (ctxt->input->cur[6] == 'T') &&
11101 		    (ctxt->input->cur[7] == 'A') &&
11102 		    (ctxt->input->cur[8] == '[')) {
11103 		    SKIP(9);
11104 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11105 		    break;
11106 		} else if ((cur == '<') && (next == '!') &&
11107 		           (avail < 9)) {
11108 		    goto done;
11109 		} else if (cur == '&') {
11110 		    if ((!terminate) &&
11111 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11112 			goto done;
11113 		    xmlParseReference(ctxt);
11114 		} else {
11115 		    /* TODO Avoid the extra copy, handle directly !!! */
11116 		    /*
11117 		     * Goal of the following test is:
11118 		     *  - minimize calls to the SAX 'character' callback
11119 		     *    when they are mergeable
11120 		     *  - handle an problem for isBlank when we only parse
11121 		     *    a sequence of blank chars and the next one is
11122 		     *    not available to check against '<' presence.
11123 		     *  - tries to homogenize the differences in SAX
11124 		     *    callbacks between the push and pull versions
11125 		     *    of the parser.
11126 		     */
11127 		    if ((ctxt->inputNr == 1) &&
11128 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11129 			if (!terminate) {
11130 			    if (ctxt->progressive) {
11131 				if ((lastlt == NULL) ||
11132 				    (ctxt->input->cur > lastlt))
11133 				    goto done;
11134 			    } else if (xmlParseLookupSequence(ctxt,
11135 			                                      '<', 0, 0) < 0) {
11136 				goto done;
11137 			    }
11138 			}
11139                     }
11140 		    ctxt->checkIndex = 0;
11141 		    xmlParseCharData(ctxt, 0);
11142 		}
11143 		/*
11144 		 * Pop-up of finished entities.
11145 		 */
11146 		while ((RAW == 0) && (ctxt->inputNr > 1))
11147 		    xmlPopInput(ctxt);
11148 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11149 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11150 		                "detected an error in element content\n");
11151 		    ctxt->instate = XML_PARSER_EOF;
11152 		    break;
11153 		}
11154 		break;
11155 	    }
11156             case XML_PARSER_END_TAG:
11157 		if (avail < 2)
11158 		    goto done;
11159 		if (!terminate) {
11160 		    if (ctxt->progressive) {
11161 		        /* > can be found unescaped in attribute values */
11162 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11163 			    goto done;
11164 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11165 			goto done;
11166 		    }
11167 		}
11168 		if (ctxt->sax2) {
11169 		    xmlParseEndTag2(ctxt,
11170 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11171 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11172 		       (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11173 		    nameNsPop(ctxt);
11174 		}
11175 #ifdef LIBXML_SAX1_ENABLED
11176 		  else
11177 		    xmlParseEndTag1(ctxt, 0);
11178 #endif /* LIBXML_SAX1_ENABLED */
11179 		if (ctxt->instate == XML_PARSER_EOF) {
11180 		    /* Nothing */
11181 		} else if (ctxt->nameNr == 0) {
11182 		    ctxt->instate = XML_PARSER_EPILOG;
11183 		} else {
11184 		    ctxt->instate = XML_PARSER_CONTENT;
11185 		}
11186 		break;
11187             case XML_PARSER_CDATA_SECTION: {
11188 	        /*
11189 		 * The Push mode need to have the SAX callback for
11190 		 * cdataBlock merge back contiguous callbacks.
11191 		 */
11192 		int base;
11193 
11194 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11195 		if (base < 0) {
11196 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11197 		        int tmp;
11198 
11199 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11200 			                        XML_PARSER_BIG_BUFFER_SIZE);
11201 			if (tmp < 0) {
11202 			    tmp = -tmp;
11203 			    ctxt->input->cur += tmp;
11204 			    goto encoding_error;
11205 			}
11206 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11207 			    if (ctxt->sax->cdataBlock != NULL)
11208 				ctxt->sax->cdataBlock(ctxt->userData,
11209 				                      ctxt->input->cur, tmp);
11210 			    else if (ctxt->sax->characters != NULL)
11211 				ctxt->sax->characters(ctxt->userData,
11212 				                      ctxt->input->cur, tmp);
11213 			}
11214 			SKIPL(tmp);
11215 			ctxt->checkIndex = 0;
11216 		    }
11217 		    goto done;
11218 		} else {
11219 		    int tmp;
11220 
11221 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11222 		    if ((tmp < 0) || (tmp != base)) {
11223 			tmp = -tmp;
11224 			ctxt->input->cur += tmp;
11225 			goto encoding_error;
11226 		    }
11227 		    if ((ctxt->sax != NULL) && (base == 0) &&
11228 		        (ctxt->sax->cdataBlock != NULL) &&
11229 		        (!ctxt->disableSAX)) {
11230 			/*
11231 			 * Special case to provide identical behaviour
11232 			 * between pull and push parsers on enpty CDATA
11233 			 * sections
11234 			 */
11235 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11236 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11237 			               "<![CDATA[", 9)))
11238 			     ctxt->sax->cdataBlock(ctxt->userData,
11239 			                           BAD_CAST "", 0);
11240 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11241 			(!ctxt->disableSAX)) {
11242 			if (ctxt->sax->cdataBlock != NULL)
11243 			    ctxt->sax->cdataBlock(ctxt->userData,
11244 						  ctxt->input->cur, base);
11245 			else if (ctxt->sax->characters != NULL)
11246 			    ctxt->sax->characters(ctxt->userData,
11247 						  ctxt->input->cur, base);
11248 		    }
11249 		    SKIPL(base + 3);
11250 		    ctxt->checkIndex = 0;
11251 		    ctxt->instate = XML_PARSER_CONTENT;
11252 #ifdef DEBUG_PUSH
11253 		    xmlGenericError(xmlGenericErrorContext,
11254 			    "PP: entering CONTENT\n");
11255 #endif
11256 		}
11257 		break;
11258 	    }
11259             case XML_PARSER_MISC:
11260 		SKIP_BLANKS;
11261 		if (ctxt->input->buf == NULL)
11262 		    avail = ctxt->input->length -
11263 		            (ctxt->input->cur - ctxt->input->base);
11264 		else
11265 		    avail = ctxt->input->buf->buffer->use -
11266 		            (ctxt->input->cur - ctxt->input->base);
11267 		if (avail < 2)
11268 		    goto done;
11269 		cur = ctxt->input->cur[0];
11270 		next = ctxt->input->cur[1];
11271 	        if ((cur == '<') && (next == '?')) {
11272 		    if ((!terminate) &&
11273 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11274 			goto done;
11275 #ifdef DEBUG_PUSH
11276 		    xmlGenericError(xmlGenericErrorContext,
11277 			    "PP: Parsing PI\n");
11278 #endif
11279 		    xmlParsePI(ctxt);
11280 		    ctxt->checkIndex = 0;
11281 		} else if ((cur == '<') && (next == '!') &&
11282 		    (ctxt->input->cur[2] == '-') &&
11283 		    (ctxt->input->cur[3] == '-')) {
11284 		    if ((!terminate) &&
11285 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11286 			goto done;
11287 #ifdef DEBUG_PUSH
11288 		    xmlGenericError(xmlGenericErrorContext,
11289 			    "PP: Parsing Comment\n");
11290 #endif
11291 		    xmlParseComment(ctxt);
11292 		    ctxt->instate = XML_PARSER_MISC;
11293 		    ctxt->checkIndex = 0;
11294 		} else if ((cur == '<') && (next == '!') &&
11295 		    (ctxt->input->cur[2] == 'D') &&
11296 		    (ctxt->input->cur[3] == 'O') &&
11297 		    (ctxt->input->cur[4] == 'C') &&
11298 		    (ctxt->input->cur[5] == 'T') &&
11299 		    (ctxt->input->cur[6] == 'Y') &&
11300 		    (ctxt->input->cur[7] == 'P') &&
11301 		    (ctxt->input->cur[8] == 'E')) {
11302 		    if ((!terminate) &&
11303 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11304 			goto done;
11305 #ifdef DEBUG_PUSH
11306 		    xmlGenericError(xmlGenericErrorContext,
11307 			    "PP: Parsing internal subset\n");
11308 #endif
11309 		    ctxt->inSubset = 1;
11310 		    xmlParseDocTypeDecl(ctxt);
11311 		    if (RAW == '[') {
11312 			ctxt->instate = XML_PARSER_DTD;
11313 #ifdef DEBUG_PUSH
11314 			xmlGenericError(xmlGenericErrorContext,
11315 				"PP: entering DTD\n");
11316 #endif
11317 		    } else {
11318 			/*
11319 			 * Create and update the external subset.
11320 			 */
11321 			ctxt->inSubset = 2;
11322 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11323 			    (ctxt->sax->externalSubset != NULL))
11324 			    ctxt->sax->externalSubset(ctxt->userData,
11325 				    ctxt->intSubName, ctxt->extSubSystem,
11326 				    ctxt->extSubURI);
11327 			ctxt->inSubset = 0;
11328 			xmlCleanSpecialAttr(ctxt);
11329 			ctxt->instate = XML_PARSER_PROLOG;
11330 #ifdef DEBUG_PUSH
11331 			xmlGenericError(xmlGenericErrorContext,
11332 				"PP: entering PROLOG\n");
11333 #endif
11334 		    }
11335 		} else if ((cur == '<') && (next == '!') &&
11336 		           (avail < 9)) {
11337 		    goto done;
11338 		} else {
11339 		    ctxt->instate = XML_PARSER_START_TAG;
11340 		    ctxt->progressive = 1;
11341 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11342 #ifdef DEBUG_PUSH
11343 		    xmlGenericError(xmlGenericErrorContext,
11344 			    "PP: entering START_TAG\n");
11345 #endif
11346 		}
11347 		break;
11348             case XML_PARSER_PROLOG:
11349 		SKIP_BLANKS;
11350 		if (ctxt->input->buf == NULL)
11351 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11352 		else
11353 		    avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11354 		if (avail < 2)
11355 		    goto done;
11356 		cur = ctxt->input->cur[0];
11357 		next = ctxt->input->cur[1];
11358 	        if ((cur == '<') && (next == '?')) {
11359 		    if ((!terminate) &&
11360 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11361 			goto done;
11362 #ifdef DEBUG_PUSH
11363 		    xmlGenericError(xmlGenericErrorContext,
11364 			    "PP: Parsing PI\n");
11365 #endif
11366 		    xmlParsePI(ctxt);
11367 		} else if ((cur == '<') && (next == '!') &&
11368 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11369 		    if ((!terminate) &&
11370 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11371 			goto done;
11372 #ifdef DEBUG_PUSH
11373 		    xmlGenericError(xmlGenericErrorContext,
11374 			    "PP: Parsing Comment\n");
11375 #endif
11376 		    xmlParseComment(ctxt);
11377 		    ctxt->instate = XML_PARSER_PROLOG;
11378 		} else if ((cur == '<') && (next == '!') &&
11379 		           (avail < 4)) {
11380 		    goto done;
11381 		} else {
11382 		    ctxt->instate = XML_PARSER_START_TAG;
11383 		    if (ctxt->progressive == 0)
11384 			ctxt->progressive = 1;
11385 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11386 #ifdef DEBUG_PUSH
11387 		    xmlGenericError(xmlGenericErrorContext,
11388 			    "PP: entering START_TAG\n");
11389 #endif
11390 		}
11391 		break;
11392             case XML_PARSER_EPILOG:
11393 		SKIP_BLANKS;
11394 		if (ctxt->input->buf == NULL)
11395 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11396 		else
11397 		    avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11398 		if (avail < 2)
11399 		    goto done;
11400 		cur = ctxt->input->cur[0];
11401 		next = ctxt->input->cur[1];
11402 	        if ((cur == '<') && (next == '?')) {
11403 		    if ((!terminate) &&
11404 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11405 			goto done;
11406 #ifdef DEBUG_PUSH
11407 		    xmlGenericError(xmlGenericErrorContext,
11408 			    "PP: Parsing PI\n");
11409 #endif
11410 		    xmlParsePI(ctxt);
11411 		    ctxt->instate = XML_PARSER_EPILOG;
11412 		} else if ((cur == '<') && (next == '!') &&
11413 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11414 		    if ((!terminate) &&
11415 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11416 			goto done;
11417 #ifdef DEBUG_PUSH
11418 		    xmlGenericError(xmlGenericErrorContext,
11419 			    "PP: Parsing Comment\n");
11420 #endif
11421 		    xmlParseComment(ctxt);
11422 		    ctxt->instate = XML_PARSER_EPILOG;
11423 		} else if ((cur == '<') && (next == '!') &&
11424 		           (avail < 4)) {
11425 		    goto done;
11426 		} else {
11427 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11428 		    ctxt->instate = XML_PARSER_EOF;
11429 #ifdef DEBUG_PUSH
11430 		    xmlGenericError(xmlGenericErrorContext,
11431 			    "PP: entering EOF\n");
11432 #endif
11433 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11434 			ctxt->sax->endDocument(ctxt->userData);
11435 		    goto done;
11436 		}
11437 		break;
11438             case XML_PARSER_DTD: {
11439 	        /*
11440 		 * Sorry but progressive parsing of the internal subset
11441 		 * is not expected to be supported. We first check that
11442 		 * the full content of the internal subset is available and
11443 		 * the parsing is launched only at that point.
11444 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11445 		 * section and not in a ']]>' sequence which are conditional
11446 		 * sections (whoever argued to keep that crap in XML deserve
11447 		 * a place in hell !).
11448 		 */
11449 		int base, i;
11450 		xmlChar *buf;
11451 	        xmlChar quote = 0;
11452 
11453 		base = ctxt->input->cur - ctxt->input->base;
11454 		if (base < 0) return(0);
11455 		if (ctxt->checkIndex > base)
11456 		    base = ctxt->checkIndex;
11457 		buf = ctxt->input->buf->buffer->content;
11458 		for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11459 		     base++) {
11460 		    if (quote != 0) {
11461 		        if (buf[base] == quote)
11462 			    quote = 0;
11463 			continue;
11464 		    }
11465 		    if ((quote == 0) && (buf[base] == '<')) {
11466 		        int found  = 0;
11467 			/* special handling of comments */
11468 		        if (((unsigned int) base + 4 <
11469 			     ctxt->input->buf->buffer->use) &&
11470 			    (buf[base + 1] == '!') &&
11471 			    (buf[base + 2] == '-') &&
11472 			    (buf[base + 3] == '-')) {
11473 			    for (;(unsigned int) base + 3 <
11474 			          ctxt->input->buf->buffer->use; base++) {
11475 				if ((buf[base] == '-') &&
11476 				    (buf[base + 1] == '-') &&
11477 				    (buf[base + 2] == '>')) {
11478 				    found = 1;
11479 				    base += 2;
11480 				    break;
11481 				}
11482 		            }
11483 			    if (!found) {
11484 #if 0
11485 			        fprintf(stderr, "unfinished comment\n");
11486 #endif
11487 			        break; /* for */
11488 		            }
11489 		            continue;
11490 			}
11491 		    }
11492 		    if (buf[base] == '"') {
11493 		        quote = '"';
11494 			continue;
11495 		    }
11496 		    if (buf[base] == '\'') {
11497 		        quote = '\'';
11498 			continue;
11499 		    }
11500 		    if (buf[base] == ']') {
11501 #if 0
11502 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
11503 			        buf[base + 1], buf[base + 2], buf[base + 3]);
11504 #endif
11505 		        if ((unsigned int) base +1 >=
11506 		            ctxt->input->buf->buffer->use)
11507 			    break;
11508 			if (buf[base + 1] == ']') {
11509 			    /* conditional crap, skip both ']' ! */
11510 			    base++;
11511 			    continue;
11512 			}
11513 		        for (i = 1;
11514 		     (unsigned int) base + i < ctxt->input->buf->buffer->use;
11515 		             i++) {
11516 			    if (buf[base + i] == '>') {
11517 #if 0
11518 			        fprintf(stderr, "found\n");
11519 #endif
11520 			        goto found_end_int_subset;
11521 			    }
11522 			    if (!IS_BLANK_CH(buf[base + i])) {
11523 #if 0
11524 			        fprintf(stderr, "not found\n");
11525 #endif
11526 			        goto not_end_of_int_subset;
11527 			    }
11528 			}
11529 #if 0
11530 			fprintf(stderr, "end of stream\n");
11531 #endif
11532 		        break;
11533 
11534 		    }
11535 not_end_of_int_subset:
11536                     continue; /* for */
11537 		}
11538 		/*
11539 		 * We didn't found the end of the Internal subset
11540 		 */
11541 #ifdef DEBUG_PUSH
11542 		if (next == 0)
11543 		    xmlGenericError(xmlGenericErrorContext,
11544 			    "PP: lookup of int subset end filed\n");
11545 #endif
11546 	        goto done;
11547 
11548 found_end_int_subset:
11549 		xmlParseInternalSubset(ctxt);
11550 		ctxt->inSubset = 2;
11551 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11552 		    (ctxt->sax->externalSubset != NULL))
11553 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11554 			    ctxt->extSubSystem, ctxt->extSubURI);
11555 		ctxt->inSubset = 0;
11556 		xmlCleanSpecialAttr(ctxt);
11557 		ctxt->instate = XML_PARSER_PROLOG;
11558 		ctxt->checkIndex = 0;
11559 #ifdef DEBUG_PUSH
11560 		xmlGenericError(xmlGenericErrorContext,
11561 			"PP: entering PROLOG\n");
11562 #endif
11563                 break;
11564 	    }
11565             case XML_PARSER_COMMENT:
11566 		xmlGenericError(xmlGenericErrorContext,
11567 			"PP: internal error, state == COMMENT\n");
11568 		ctxt->instate = XML_PARSER_CONTENT;
11569 #ifdef DEBUG_PUSH
11570 		xmlGenericError(xmlGenericErrorContext,
11571 			"PP: entering CONTENT\n");
11572 #endif
11573 		break;
11574             case XML_PARSER_IGNORE:
11575 		xmlGenericError(xmlGenericErrorContext,
11576 			"PP: internal error, state == IGNORE");
11577 	        ctxt->instate = XML_PARSER_DTD;
11578 #ifdef DEBUG_PUSH
11579 		xmlGenericError(xmlGenericErrorContext,
11580 			"PP: entering DTD\n");
11581 #endif
11582 	        break;
11583             case XML_PARSER_PI:
11584 		xmlGenericError(xmlGenericErrorContext,
11585 			"PP: internal error, state == PI\n");
11586 		ctxt->instate = XML_PARSER_CONTENT;
11587 #ifdef DEBUG_PUSH
11588 		xmlGenericError(xmlGenericErrorContext,
11589 			"PP: entering CONTENT\n");
11590 #endif
11591 		break;
11592             case XML_PARSER_ENTITY_DECL:
11593 		xmlGenericError(xmlGenericErrorContext,
11594 			"PP: internal error, state == ENTITY_DECL\n");
11595 		ctxt->instate = XML_PARSER_DTD;
11596 #ifdef DEBUG_PUSH
11597 		xmlGenericError(xmlGenericErrorContext,
11598 			"PP: entering DTD\n");
11599 #endif
11600 		break;
11601             case XML_PARSER_ENTITY_VALUE:
11602 		xmlGenericError(xmlGenericErrorContext,
11603 			"PP: internal error, state == ENTITY_VALUE\n");
11604 		ctxt->instate = XML_PARSER_CONTENT;
11605 #ifdef DEBUG_PUSH
11606 		xmlGenericError(xmlGenericErrorContext,
11607 			"PP: entering DTD\n");
11608 #endif
11609 		break;
11610             case XML_PARSER_ATTRIBUTE_VALUE:
11611 		xmlGenericError(xmlGenericErrorContext,
11612 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
11613 		ctxt->instate = XML_PARSER_START_TAG;
11614 #ifdef DEBUG_PUSH
11615 		xmlGenericError(xmlGenericErrorContext,
11616 			"PP: entering START_TAG\n");
11617 #endif
11618 		break;
11619             case XML_PARSER_SYSTEM_LITERAL:
11620 		xmlGenericError(xmlGenericErrorContext,
11621 			"PP: internal error, state == SYSTEM_LITERAL\n");
11622 		ctxt->instate = XML_PARSER_START_TAG;
11623 #ifdef DEBUG_PUSH
11624 		xmlGenericError(xmlGenericErrorContext,
11625 			"PP: entering START_TAG\n");
11626 #endif
11627 		break;
11628             case XML_PARSER_PUBLIC_LITERAL:
11629 		xmlGenericError(xmlGenericErrorContext,
11630 			"PP: internal error, state == PUBLIC_LITERAL\n");
11631 		ctxt->instate = XML_PARSER_START_TAG;
11632 #ifdef DEBUG_PUSH
11633 		xmlGenericError(xmlGenericErrorContext,
11634 			"PP: entering START_TAG\n");
11635 #endif
11636 		break;
11637 	}
11638     }
11639 done:
11640 #ifdef DEBUG_PUSH
11641     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11642 #endif
11643     return(ret);
11644 encoding_error:
11645     {
11646         char buffer[150];
11647 
11648 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11649 			ctxt->input->cur[0], ctxt->input->cur[1],
11650 			ctxt->input->cur[2], ctxt->input->cur[3]);
11651 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11652 		     "Input is not proper UTF-8, indicate encoding !\n%s",
11653 		     BAD_CAST buffer, NULL);
11654     }
11655     return(0);
11656 }
11657 
11658 /**
11659  * xmlParseChunk:
11660  * @ctxt:  an XML parser context
11661  * @chunk:  an char array
11662  * @size:  the size in byte of the chunk
11663  * @terminate:  last chunk indicator
11664  *
11665  * Parse a Chunk of memory
11666  *
11667  * Returns zero if no error, the xmlParserErrors otherwise.
11668  */
11669 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11670 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11671               int terminate) {
11672     int end_in_lf = 0;
11673     int remain = 0;
11674 
11675     if (ctxt == NULL)
11676         return(XML_ERR_INTERNAL_ERROR);
11677     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11678         return(ctxt->errNo);
11679     if (ctxt->instate == XML_PARSER_START)
11680         xmlDetectSAX2(ctxt);
11681     if ((size > 0) && (chunk != NULL) && (!terminate) &&
11682         (chunk[size - 1] == '\r')) {
11683 	end_in_lf = 1;
11684 	size--;
11685     }
11686 
11687 xmldecl_done:
11688 
11689     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11690         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
11691 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11692 	int cur = ctxt->input->cur - ctxt->input->base;
11693 	int res;
11694 
11695         /*
11696          * Specific handling if we autodetected an encoding, we should not
11697          * push more than the first line ... which depend on the encoding
11698          * And only push the rest once the final encoding was detected
11699          */
11700         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11701             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11702             unsigned int len = 45;
11703 
11704             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11705                                BAD_CAST "UTF-16")) ||
11706                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11707                                BAD_CAST "UTF16")))
11708                 len = 90;
11709             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11710                                     BAD_CAST "UCS-4")) ||
11711                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11712                                     BAD_CAST "UCS4")))
11713                 len = 180;
11714 
11715             if (ctxt->input->buf->rawconsumed < len)
11716                 len -= ctxt->input->buf->rawconsumed;
11717 
11718             /*
11719              * Change size for reading the initial declaration only
11720              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11721              * will blindly copy extra bytes from memory.
11722              */
11723             if ((unsigned int) size > len) {
11724                 remain = size - len;
11725                 size = len;
11726             } else {
11727                 remain = 0;
11728             }
11729         }
11730 	res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11731 	if (res < 0) {
11732 	    ctxt->errNo = XML_PARSER_EOF;
11733 	    ctxt->disableSAX = 1;
11734 	    return (XML_PARSER_EOF);
11735 	}
11736 	ctxt->input->base = ctxt->input->buf->buffer->content + base;
11737 	ctxt->input->cur = ctxt->input->base + cur;
11738 	ctxt->input->end =
11739 	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11740 #ifdef DEBUG_PUSH
11741 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11742 #endif
11743 
11744     } else if (ctxt->instate != XML_PARSER_EOF) {
11745 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11746 	    xmlParserInputBufferPtr in = ctxt->input->buf;
11747 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
11748 		    (in->raw != NULL)) {
11749 		int nbchars;
11750 
11751 		nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11752 		if (nbchars < 0) {
11753 		    /* TODO 2.6.0 */
11754 		    xmlGenericError(xmlGenericErrorContext,
11755 				    "xmlParseChunk: encoder error\n");
11756 		    return(XML_ERR_INVALID_ENCODING);
11757 		}
11758 	    }
11759 	}
11760     }
11761     if (remain != 0)
11762         xmlParseTryOrFinish(ctxt, 0);
11763     else
11764         xmlParseTryOrFinish(ctxt, terminate);
11765     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11766         return(ctxt->errNo);
11767 
11768     if (remain != 0) {
11769         chunk += size;
11770         size = remain;
11771         remain = 0;
11772         goto xmldecl_done;
11773     }
11774     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11775         (ctxt->input->buf != NULL)) {
11776 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11777     }
11778     if (terminate) {
11779 	/*
11780 	 * Check for termination
11781 	 */
11782 	int avail = 0;
11783 
11784 	if (ctxt->input != NULL) {
11785 	    if (ctxt->input->buf == NULL)
11786 		avail = ctxt->input->length -
11787 			(ctxt->input->cur - ctxt->input->base);
11788 	    else
11789 		avail = ctxt->input->buf->buffer->use -
11790 			(ctxt->input->cur - ctxt->input->base);
11791 	}
11792 
11793 	if ((ctxt->instate != XML_PARSER_EOF) &&
11794 	    (ctxt->instate != XML_PARSER_EPILOG)) {
11795 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11796 	}
11797 	if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11798 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11799 	}
11800 	if (ctxt->instate != XML_PARSER_EOF) {
11801 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11802 		ctxt->sax->endDocument(ctxt->userData);
11803 	}
11804 	ctxt->instate = XML_PARSER_EOF;
11805     }
11806     return((xmlParserErrors) ctxt->errNo);
11807 }
11808 
11809 /************************************************************************
11810  *									*
11811  * 		I/O front end functions to the parser			*
11812  *									*
11813  ************************************************************************/
11814 
11815 /**
11816  * xmlCreatePushParserCtxt:
11817  * @sax:  a SAX handler
11818  * @user_data:  The user data returned on SAX callbacks
11819  * @chunk:  a pointer to an array of chars
11820  * @size:  number of chars in the array
11821  * @filename:  an optional file name or URI
11822  *
11823  * Create a parser context for using the XML parser in push mode.
11824  * If @buffer and @size are non-NULL, the data is used to detect
11825  * the encoding.  The remaining characters will be parsed so they
11826  * don't need to be fed in again through xmlParseChunk.
11827  * To allow content encoding detection, @size should be >= 4
11828  * The value of @filename is used for fetching external entities
11829  * and error/warning reports.
11830  *
11831  * Returns the new parser context or NULL
11832  */
11833 
11834 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11835 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11836                         const char *chunk, int size, const char *filename) {
11837     xmlParserCtxtPtr ctxt;
11838     xmlParserInputPtr inputStream;
11839     xmlParserInputBufferPtr buf;
11840     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11841 
11842     /*
11843      * plug some encoding conversion routines
11844      */
11845     if ((chunk != NULL) && (size >= 4))
11846 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11847 
11848     buf = xmlAllocParserInputBuffer(enc);
11849     if (buf == NULL) return(NULL);
11850 
11851     ctxt = xmlNewParserCtxt();
11852     if (ctxt == NULL) {
11853         xmlErrMemory(NULL, "creating parser: out of memory\n");
11854 	xmlFreeParserInputBuffer(buf);
11855 	return(NULL);
11856     }
11857     ctxt->dictNames = 1;
11858     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11859     if (ctxt->pushTab == NULL) {
11860         xmlErrMemory(ctxt, NULL);
11861 	xmlFreeParserInputBuffer(buf);
11862 	xmlFreeParserCtxt(ctxt);
11863 	return(NULL);
11864     }
11865     if (sax != NULL) {
11866 #ifdef LIBXML_SAX1_ENABLED
11867 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11868 #endif /* LIBXML_SAX1_ENABLED */
11869 	    xmlFree(ctxt->sax);
11870 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11871 	if (ctxt->sax == NULL) {
11872 	    xmlErrMemory(ctxt, NULL);
11873 	    xmlFreeParserInputBuffer(buf);
11874 	    xmlFreeParserCtxt(ctxt);
11875 	    return(NULL);
11876 	}
11877 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11878 	if (sax->initialized == XML_SAX2_MAGIC)
11879 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11880 	else
11881 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11882 	if (user_data != NULL)
11883 	    ctxt->userData = user_data;
11884     }
11885     if (filename == NULL) {
11886 	ctxt->directory = NULL;
11887     } else {
11888         ctxt->directory = xmlParserGetDirectory(filename);
11889     }
11890 
11891     inputStream = xmlNewInputStream(ctxt);
11892     if (inputStream == NULL) {
11893 	xmlFreeParserCtxt(ctxt);
11894 	xmlFreeParserInputBuffer(buf);
11895 	return(NULL);
11896     }
11897 
11898     if (filename == NULL)
11899 	inputStream->filename = NULL;
11900     else {
11901 	inputStream->filename = (char *)
11902 	    xmlCanonicPath((const xmlChar *) filename);
11903 	if (inputStream->filename == NULL) {
11904 	    xmlFreeParserCtxt(ctxt);
11905 	    xmlFreeParserInputBuffer(buf);
11906 	    return(NULL);
11907 	}
11908     }
11909     inputStream->buf = buf;
11910     inputStream->base = inputStream->buf->buffer->content;
11911     inputStream->cur = inputStream->buf->buffer->content;
11912     inputStream->end =
11913 	&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11914 
11915     inputPush(ctxt, inputStream);
11916 
11917     /*
11918      * If the caller didn't provide an initial 'chunk' for determining
11919      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11920      * that it can be automatically determined later
11921      */
11922     if ((size == 0) || (chunk == NULL)) {
11923 	ctxt->charset = XML_CHAR_ENCODING_NONE;
11924     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11925 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11926 	int cur = ctxt->input->cur - ctxt->input->base;
11927 
11928 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11929 
11930 	ctxt->input->base = ctxt->input->buf->buffer->content + base;
11931 	ctxt->input->cur = ctxt->input->base + cur;
11932 	ctxt->input->end =
11933 	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11934 #ifdef DEBUG_PUSH
11935 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11936 #endif
11937     }
11938 
11939     if (enc != XML_CHAR_ENCODING_NONE) {
11940         xmlSwitchEncoding(ctxt, enc);
11941     }
11942 
11943     return(ctxt);
11944 }
11945 #endif /* LIBXML_PUSH_ENABLED */
11946 
11947 /**
11948  * xmlStopParser:
11949  * @ctxt:  an XML parser context
11950  *
11951  * Blocks further parser processing
11952  */
11953 void
xmlStopParser(xmlParserCtxtPtr ctxt)11954 xmlStopParser(xmlParserCtxtPtr ctxt) {
11955     if (ctxt == NULL)
11956         return;
11957     ctxt->instate = XML_PARSER_EOF;
11958     ctxt->disableSAX = 1;
11959     if (ctxt->input != NULL) {
11960 	ctxt->input->cur = BAD_CAST"";
11961 	ctxt->input->base = ctxt->input->cur;
11962     }
11963 }
11964 
11965 /**
11966  * xmlCreateIOParserCtxt:
11967  * @sax:  a SAX handler
11968  * @user_data:  The user data returned on SAX callbacks
11969  * @ioread:  an I/O read function
11970  * @ioclose:  an I/O close function
11971  * @ioctx:  an I/O handler
11972  * @enc:  the charset encoding if known
11973  *
11974  * Create a parser context for using the XML parser with an existing
11975  * I/O stream
11976  *
11977  * Returns the new parser context or NULL
11978  */
11979 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11980 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11981 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
11982 	void *ioctx, xmlCharEncoding enc) {
11983     xmlParserCtxtPtr ctxt;
11984     xmlParserInputPtr inputStream;
11985     xmlParserInputBufferPtr buf;
11986 
11987     if (ioread == NULL) return(NULL);
11988 
11989     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11990     if (buf == NULL) return(NULL);
11991 
11992     ctxt = xmlNewParserCtxt();
11993     if (ctxt == NULL) {
11994 	xmlFreeParserInputBuffer(buf);
11995 	return(NULL);
11996     }
11997     if (sax != NULL) {
11998 #ifdef LIBXML_SAX1_ENABLED
11999 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12000 #endif /* LIBXML_SAX1_ENABLED */
12001 	    xmlFree(ctxt->sax);
12002 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12003 	if (ctxt->sax == NULL) {
12004 	    xmlErrMemory(ctxt, NULL);
12005 	    xmlFreeParserCtxt(ctxt);
12006 	    return(NULL);
12007 	}
12008 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12009 	if (sax->initialized == XML_SAX2_MAGIC)
12010 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12011 	else
12012 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12013 	if (user_data != NULL)
12014 	    ctxt->userData = user_data;
12015     }
12016 
12017     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12018     if (inputStream == NULL) {
12019 	xmlFreeParserCtxt(ctxt);
12020 	return(NULL);
12021     }
12022     inputPush(ctxt, inputStream);
12023 
12024     return(ctxt);
12025 }
12026 
12027 #ifdef LIBXML_VALID_ENABLED
12028 /************************************************************************
12029  *									*
12030  * 		Front ends when parsing a DTD				*
12031  *									*
12032  ************************************************************************/
12033 
12034 /**
12035  * xmlIOParseDTD:
12036  * @sax:  the SAX handler block or NULL
12037  * @input:  an Input Buffer
12038  * @enc:  the charset encoding if known
12039  *
12040  * Load and parse a DTD
12041  *
12042  * Returns the resulting xmlDtdPtr or NULL in case of error.
12043  * @input will be freed by the function in any case.
12044  */
12045 
12046 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12047 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12048 	      xmlCharEncoding enc) {
12049     xmlDtdPtr ret = NULL;
12050     xmlParserCtxtPtr ctxt;
12051     xmlParserInputPtr pinput = NULL;
12052     xmlChar start[4];
12053 
12054     if (input == NULL)
12055 	return(NULL);
12056 
12057     ctxt = xmlNewParserCtxt();
12058     if (ctxt == NULL) {
12059         xmlFreeParserInputBuffer(input);
12060 	return(NULL);
12061     }
12062 
12063     /*
12064      * Set-up the SAX context
12065      */
12066     if (sax != NULL) {
12067 	if (ctxt->sax != NULL)
12068 	    xmlFree(ctxt->sax);
12069         ctxt->sax = sax;
12070         ctxt->userData = ctxt;
12071     }
12072     xmlDetectSAX2(ctxt);
12073 
12074     /*
12075      * generate a parser input from the I/O handler
12076      */
12077 
12078     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12079     if (pinput == NULL) {
12080         if (sax != NULL) ctxt->sax = NULL;
12081         xmlFreeParserInputBuffer(input);
12082 	xmlFreeParserCtxt(ctxt);
12083 	return(NULL);
12084     }
12085 
12086     /*
12087      * plug some encoding conversion routines here.
12088      */
12089     if (xmlPushInput(ctxt, pinput) < 0) {
12090         if (sax != NULL) ctxt->sax = NULL;
12091 	xmlFreeParserCtxt(ctxt);
12092 	return(NULL);
12093     }
12094     if (enc != XML_CHAR_ENCODING_NONE) {
12095         xmlSwitchEncoding(ctxt, enc);
12096     }
12097 
12098     pinput->filename = NULL;
12099     pinput->line = 1;
12100     pinput->col = 1;
12101     pinput->base = ctxt->input->cur;
12102     pinput->cur = ctxt->input->cur;
12103     pinput->free = NULL;
12104 
12105     /*
12106      * let's parse that entity knowing it's an external subset.
12107      */
12108     ctxt->inSubset = 2;
12109     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12110     if (ctxt->myDoc == NULL) {
12111 	xmlErrMemory(ctxt, "New Doc failed");
12112 	return(NULL);
12113     }
12114     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12115     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12116 	                               BAD_CAST "none", BAD_CAST "none");
12117 
12118     if ((enc == XML_CHAR_ENCODING_NONE) &&
12119         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12120 	/*
12121 	 * Get the 4 first bytes and decode the charset
12122 	 * if enc != XML_CHAR_ENCODING_NONE
12123 	 * plug some encoding conversion routines.
12124 	 */
12125 	start[0] = RAW;
12126 	start[1] = NXT(1);
12127 	start[2] = NXT(2);
12128 	start[3] = NXT(3);
12129 	enc = xmlDetectCharEncoding(start, 4);
12130 	if (enc != XML_CHAR_ENCODING_NONE) {
12131 	    xmlSwitchEncoding(ctxt, enc);
12132 	}
12133     }
12134 
12135     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12136 
12137     if (ctxt->myDoc != NULL) {
12138 	if (ctxt->wellFormed) {
12139 	    ret = ctxt->myDoc->extSubset;
12140 	    ctxt->myDoc->extSubset = NULL;
12141 	    if (ret != NULL) {
12142 		xmlNodePtr tmp;
12143 
12144 		ret->doc = NULL;
12145 		tmp = ret->children;
12146 		while (tmp != NULL) {
12147 		    tmp->doc = NULL;
12148 		    tmp = tmp->next;
12149 		}
12150 	    }
12151 	} else {
12152 	    ret = NULL;
12153 	}
12154         xmlFreeDoc(ctxt->myDoc);
12155         ctxt->myDoc = NULL;
12156     }
12157     if (sax != NULL) ctxt->sax = NULL;
12158     xmlFreeParserCtxt(ctxt);
12159 
12160     return(ret);
12161 }
12162 
12163 /**
12164  * xmlSAXParseDTD:
12165  * @sax:  the SAX handler block
12166  * @ExternalID:  a NAME* containing the External ID of the DTD
12167  * @SystemID:  a NAME* containing the URL to the DTD
12168  *
12169  * Load and parse an external subset.
12170  *
12171  * Returns the resulting xmlDtdPtr or NULL in case of error.
12172  */
12173 
12174 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12175 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12176                           const xmlChar *SystemID) {
12177     xmlDtdPtr ret = NULL;
12178     xmlParserCtxtPtr ctxt;
12179     xmlParserInputPtr input = NULL;
12180     xmlCharEncoding enc;
12181     xmlChar* systemIdCanonic;
12182 
12183     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12184 
12185     ctxt = xmlNewParserCtxt();
12186     if (ctxt == NULL) {
12187 	return(NULL);
12188     }
12189 
12190     /*
12191      * Set-up the SAX context
12192      */
12193     if (sax != NULL) {
12194 	if (ctxt->sax != NULL)
12195 	    xmlFree(ctxt->sax);
12196         ctxt->sax = sax;
12197         ctxt->userData = ctxt;
12198     }
12199 
12200     /*
12201      * Canonicalise the system ID
12202      */
12203     systemIdCanonic = xmlCanonicPath(SystemID);
12204     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12205 	xmlFreeParserCtxt(ctxt);
12206 	return(NULL);
12207     }
12208 
12209     /*
12210      * Ask the Entity resolver to load the damn thing
12211      */
12212 
12213     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12214 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12215 	                                 systemIdCanonic);
12216     if (input == NULL) {
12217         if (sax != NULL) ctxt->sax = NULL;
12218 	xmlFreeParserCtxt(ctxt);
12219 	if (systemIdCanonic != NULL)
12220 	    xmlFree(systemIdCanonic);
12221 	return(NULL);
12222     }
12223 
12224     /*
12225      * plug some encoding conversion routines here.
12226      */
12227     if (xmlPushInput(ctxt, input) < 0) {
12228         if (sax != NULL) ctxt->sax = NULL;
12229 	xmlFreeParserCtxt(ctxt);
12230 	if (systemIdCanonic != NULL)
12231 	    xmlFree(systemIdCanonic);
12232 	return(NULL);
12233     }
12234     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12235 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12236 	xmlSwitchEncoding(ctxt, enc);
12237     }
12238 
12239     if (input->filename == NULL)
12240 	input->filename = (char *) systemIdCanonic;
12241     else
12242 	xmlFree(systemIdCanonic);
12243     input->line = 1;
12244     input->col = 1;
12245     input->base = ctxt->input->cur;
12246     input->cur = ctxt->input->cur;
12247     input->free = NULL;
12248 
12249     /*
12250      * let's parse that entity knowing it's an external subset.
12251      */
12252     ctxt->inSubset = 2;
12253     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12254     if (ctxt->myDoc == NULL) {
12255 	xmlErrMemory(ctxt, "New Doc failed");
12256         if (sax != NULL) ctxt->sax = NULL;
12257 	xmlFreeParserCtxt(ctxt);
12258 	return(NULL);
12259     }
12260     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12261     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12262 	                               ExternalID, SystemID);
12263     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12264 
12265     if (ctxt->myDoc != NULL) {
12266 	if (ctxt->wellFormed) {
12267 	    ret = ctxt->myDoc->extSubset;
12268 	    ctxt->myDoc->extSubset = NULL;
12269 	    if (ret != NULL) {
12270 		xmlNodePtr tmp;
12271 
12272 		ret->doc = NULL;
12273 		tmp = ret->children;
12274 		while (tmp != NULL) {
12275 		    tmp->doc = NULL;
12276 		    tmp = tmp->next;
12277 		}
12278 	    }
12279 	} else {
12280 	    ret = NULL;
12281 	}
12282         xmlFreeDoc(ctxt->myDoc);
12283         ctxt->myDoc = NULL;
12284     }
12285     if (sax != NULL) ctxt->sax = NULL;
12286     xmlFreeParserCtxt(ctxt);
12287 
12288     return(ret);
12289 }
12290 
12291 
12292 /**
12293  * xmlParseDTD:
12294  * @ExternalID:  a NAME* containing the External ID of the DTD
12295  * @SystemID:  a NAME* containing the URL to the DTD
12296  *
12297  * Load and parse an external subset.
12298  *
12299  * Returns the resulting xmlDtdPtr or NULL in case of error.
12300  */
12301 
12302 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12303 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12304     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12305 }
12306 #endif /* LIBXML_VALID_ENABLED */
12307 
12308 /************************************************************************
12309  *									*
12310  * 		Front ends when parsing an Entity			*
12311  *									*
12312  ************************************************************************/
12313 
12314 /**
12315  * xmlParseCtxtExternalEntity:
12316  * @ctx:  the existing parsing context
12317  * @URL:  the URL for the entity to load
12318  * @ID:  the System ID for the entity to load
12319  * @lst:  the return value for the set of parsed nodes
12320  *
12321  * Parse an external general entity within an existing parsing context
12322  * An external general parsed entity is well-formed if it matches the
12323  * production labeled extParsedEnt.
12324  *
12325  * [78] extParsedEnt ::= TextDecl? content
12326  *
12327  * Returns 0 if the entity is well formed, -1 in case of args problem and
12328  *    the parser error code otherwise
12329  */
12330 
12331 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12332 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12333 	               const xmlChar *ID, xmlNodePtr *lst) {
12334     xmlParserCtxtPtr ctxt;
12335     xmlDocPtr newDoc;
12336     xmlNodePtr newRoot;
12337     xmlSAXHandlerPtr oldsax = NULL;
12338     int ret = 0;
12339     xmlChar start[4];
12340     xmlCharEncoding enc;
12341 
12342     if (ctx == NULL) return(-1);
12343 
12344     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12345         (ctx->depth > 1024)) {
12346 	return(XML_ERR_ENTITY_LOOP);
12347     }
12348 
12349     if (lst != NULL)
12350         *lst = NULL;
12351     if ((URL == NULL) && (ID == NULL))
12352 	return(-1);
12353     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12354 	return(-1);
12355 
12356     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12357     if (ctxt == NULL) {
12358 	return(-1);
12359     }
12360 
12361     oldsax = ctxt->sax;
12362     ctxt->sax = ctx->sax;
12363     xmlDetectSAX2(ctxt);
12364     newDoc = xmlNewDoc(BAD_CAST "1.0");
12365     if (newDoc == NULL) {
12366 	xmlFreeParserCtxt(ctxt);
12367 	return(-1);
12368     }
12369     newDoc->properties = XML_DOC_INTERNAL;
12370     if (ctx->myDoc->dict) {
12371 	newDoc->dict = ctx->myDoc->dict;
12372 	xmlDictReference(newDoc->dict);
12373     }
12374     if (ctx->myDoc != NULL) {
12375 	newDoc->intSubset = ctx->myDoc->intSubset;
12376 	newDoc->extSubset = ctx->myDoc->extSubset;
12377     }
12378     if (ctx->myDoc->URL != NULL) {
12379 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12380     }
12381     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12382     if (newRoot == NULL) {
12383 	ctxt->sax = oldsax;
12384 	xmlFreeParserCtxt(ctxt);
12385 	newDoc->intSubset = NULL;
12386 	newDoc->extSubset = NULL;
12387         xmlFreeDoc(newDoc);
12388 	return(-1);
12389     }
12390     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12391     nodePush(ctxt, newDoc->children);
12392     if (ctx->myDoc == NULL) {
12393 	ctxt->myDoc = newDoc;
12394     } else {
12395 	ctxt->myDoc = ctx->myDoc;
12396 	newDoc->children->doc = ctx->myDoc;
12397     }
12398 
12399     /*
12400      * Get the 4 first bytes and decode the charset
12401      * if enc != XML_CHAR_ENCODING_NONE
12402      * plug some encoding conversion routines.
12403      */
12404     GROW
12405     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12406 	start[0] = RAW;
12407 	start[1] = NXT(1);
12408 	start[2] = NXT(2);
12409 	start[3] = NXT(3);
12410 	enc = xmlDetectCharEncoding(start, 4);
12411 	if (enc != XML_CHAR_ENCODING_NONE) {
12412 	    xmlSwitchEncoding(ctxt, enc);
12413 	}
12414     }
12415 
12416     /*
12417      * Parse a possible text declaration first
12418      */
12419     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12420 	xmlParseTextDecl(ctxt);
12421 	/*
12422 	 * An XML-1.0 document can't reference an entity not XML-1.0
12423 	 */
12424 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12425 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12426 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12427 	                   "Version mismatch between document and entity\n");
12428 	}
12429     }
12430 
12431     /*
12432      * Doing validity checking on chunk doesn't make sense
12433      */
12434     ctxt->instate = XML_PARSER_CONTENT;
12435     ctxt->validate = ctx->validate;
12436     ctxt->valid = ctx->valid;
12437     ctxt->loadsubset = ctx->loadsubset;
12438     ctxt->depth = ctx->depth + 1;
12439     ctxt->replaceEntities = ctx->replaceEntities;
12440     if (ctxt->validate) {
12441 	ctxt->vctxt.error = ctx->vctxt.error;
12442 	ctxt->vctxt.warning = ctx->vctxt.warning;
12443     } else {
12444 	ctxt->vctxt.error = NULL;
12445 	ctxt->vctxt.warning = NULL;
12446     }
12447     ctxt->vctxt.nodeTab = NULL;
12448     ctxt->vctxt.nodeNr = 0;
12449     ctxt->vctxt.nodeMax = 0;
12450     ctxt->vctxt.node = NULL;
12451     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12452     ctxt->dict = ctx->dict;
12453     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12454     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12455     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12456     ctxt->dictNames = ctx->dictNames;
12457     ctxt->attsDefault = ctx->attsDefault;
12458     ctxt->attsSpecial = ctx->attsSpecial;
12459     ctxt->linenumbers = ctx->linenumbers;
12460 
12461     xmlParseContent(ctxt);
12462 
12463     ctx->validate = ctxt->validate;
12464     ctx->valid = ctxt->valid;
12465     if ((RAW == '<') && (NXT(1) == '/')) {
12466 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12467     } else if (RAW != 0) {
12468 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12469     }
12470     if (ctxt->node != newDoc->children) {
12471 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12472     }
12473 
12474     if (!ctxt->wellFormed) {
12475         if (ctxt->errNo == 0)
12476 	    ret = 1;
12477 	else
12478 	    ret = ctxt->errNo;
12479     } else {
12480 	if (lst != NULL) {
12481 	    xmlNodePtr cur;
12482 
12483 	    /*
12484 	     * Return the newly created nodeset after unlinking it from
12485 	     * they pseudo parent.
12486 	     */
12487 	    cur = newDoc->children->children;
12488 	    *lst = cur;
12489 	    while (cur != NULL) {
12490 		cur->parent = NULL;
12491 		cur = cur->next;
12492 	    }
12493             newDoc->children->children = NULL;
12494 	}
12495 	ret = 0;
12496     }
12497     ctxt->sax = oldsax;
12498     ctxt->dict = NULL;
12499     ctxt->attsDefault = NULL;
12500     ctxt->attsSpecial = NULL;
12501     xmlFreeParserCtxt(ctxt);
12502     newDoc->intSubset = NULL;
12503     newDoc->extSubset = NULL;
12504     xmlFreeDoc(newDoc);
12505 
12506     return(ret);
12507 }
12508 
12509 /**
12510  * xmlParseExternalEntityPrivate:
12511  * @doc:  the document the chunk pertains to
12512  * @oldctxt:  the previous parser context if available
12513  * @sax:  the SAX handler bloc (possibly NULL)
12514  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12515  * @depth:  Used for loop detection, use 0
12516  * @URL:  the URL for the entity to load
12517  * @ID:  the System ID for the entity to load
12518  * @list:  the return value for the set of parsed nodes
12519  *
12520  * Private version of xmlParseExternalEntity()
12521  *
12522  * Returns 0 if the entity is well formed, -1 in case of args problem and
12523  *    the parser error code otherwise
12524  */
12525 
12526 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12527 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12528 	              xmlSAXHandlerPtr sax,
12529 		      void *user_data, int depth, const xmlChar *URL,
12530 		      const xmlChar *ID, xmlNodePtr *list) {
12531     xmlParserCtxtPtr ctxt;
12532     xmlDocPtr newDoc;
12533     xmlNodePtr newRoot;
12534     xmlSAXHandlerPtr oldsax = NULL;
12535     xmlParserErrors ret = XML_ERR_OK;
12536     xmlChar start[4];
12537     xmlCharEncoding enc;
12538 
12539     if (((depth > 40) &&
12540 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12541 	(depth > 1024)) {
12542 	return(XML_ERR_ENTITY_LOOP);
12543     }
12544 
12545     if (list != NULL)
12546         *list = NULL;
12547     if ((URL == NULL) && (ID == NULL))
12548 	return(XML_ERR_INTERNAL_ERROR);
12549     if (doc == NULL)
12550 	return(XML_ERR_INTERNAL_ERROR);
12551 
12552 
12553     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12554     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12555     ctxt->userData = ctxt;
12556     if (oldctxt != NULL) {
12557 	ctxt->_private = oldctxt->_private;
12558 	ctxt->loadsubset = oldctxt->loadsubset;
12559 	ctxt->validate = oldctxt->validate;
12560 	ctxt->external = oldctxt->external;
12561 	ctxt->record_info = oldctxt->record_info;
12562 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12563 	ctxt->node_seq.length = oldctxt->node_seq.length;
12564 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12565     } else {
12566 	/*
12567 	 * Doing validity checking on chunk without context
12568 	 * doesn't make sense
12569 	 */
12570 	ctxt->_private = NULL;
12571 	ctxt->validate = 0;
12572 	ctxt->external = 2;
12573 	ctxt->loadsubset = 0;
12574     }
12575     if (sax != NULL) {
12576 	oldsax = ctxt->sax;
12577         ctxt->sax = sax;
12578 	if (user_data != NULL)
12579 	    ctxt->userData = user_data;
12580     }
12581     xmlDetectSAX2(ctxt);
12582     newDoc = xmlNewDoc(BAD_CAST "1.0");
12583     if (newDoc == NULL) {
12584 	ctxt->node_seq.maximum = 0;
12585 	ctxt->node_seq.length = 0;
12586 	ctxt->node_seq.buffer = NULL;
12587 	xmlFreeParserCtxt(ctxt);
12588 	return(XML_ERR_INTERNAL_ERROR);
12589     }
12590     newDoc->properties = XML_DOC_INTERNAL;
12591     newDoc->intSubset = doc->intSubset;
12592     newDoc->extSubset = doc->extSubset;
12593     newDoc->dict = doc->dict;
12594     xmlDictReference(newDoc->dict);
12595 
12596     if (doc->URL != NULL) {
12597 	newDoc->URL = xmlStrdup(doc->URL);
12598     }
12599     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12600     if (newRoot == NULL) {
12601 	if (sax != NULL)
12602 	    ctxt->sax = oldsax;
12603 	ctxt->node_seq.maximum = 0;
12604 	ctxt->node_seq.length = 0;
12605 	ctxt->node_seq.buffer = NULL;
12606 	xmlFreeParserCtxt(ctxt);
12607 	newDoc->intSubset = NULL;
12608 	newDoc->extSubset = NULL;
12609         xmlFreeDoc(newDoc);
12610 	return(XML_ERR_INTERNAL_ERROR);
12611     }
12612     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12613     nodePush(ctxt, newDoc->children);
12614     ctxt->myDoc = doc;
12615     newRoot->doc = doc;
12616 
12617     /*
12618      * Get the 4 first bytes and decode the charset
12619      * if enc != XML_CHAR_ENCODING_NONE
12620      * plug some encoding conversion routines.
12621      */
12622     GROW;
12623     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12624 	start[0] = RAW;
12625 	start[1] = NXT(1);
12626 	start[2] = NXT(2);
12627 	start[3] = NXT(3);
12628 	enc = xmlDetectCharEncoding(start, 4);
12629 	if (enc != XML_CHAR_ENCODING_NONE) {
12630 	    xmlSwitchEncoding(ctxt, enc);
12631 	}
12632     }
12633 
12634     /*
12635      * Parse a possible text declaration first
12636      */
12637     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12638 	xmlParseTextDecl(ctxt);
12639     }
12640 
12641     ctxt->instate = XML_PARSER_CONTENT;
12642     ctxt->depth = depth;
12643 
12644     xmlParseContent(ctxt);
12645 
12646     if ((RAW == '<') && (NXT(1) == '/')) {
12647 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12648     } else if (RAW != 0) {
12649 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12650     }
12651     if (ctxt->node != newDoc->children) {
12652 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12653     }
12654 
12655     if (!ctxt->wellFormed) {
12656         if (ctxt->errNo == 0)
12657 	    ret = XML_ERR_INTERNAL_ERROR;
12658 	else
12659 	    ret = (xmlParserErrors)ctxt->errNo;
12660     } else {
12661 	if (list != NULL) {
12662 	    xmlNodePtr cur;
12663 
12664 	    /*
12665 	     * Return the newly created nodeset after unlinking it from
12666 	     * they pseudo parent.
12667 	     */
12668 	    cur = newDoc->children->children;
12669 	    *list = cur;
12670 	    while (cur != NULL) {
12671 		cur->parent = NULL;
12672 		cur = cur->next;
12673 	    }
12674             newDoc->children->children = NULL;
12675 	}
12676 	ret = XML_ERR_OK;
12677     }
12678 
12679     /*
12680      * Record in the parent context the number of entities replacement
12681      * done when parsing that reference.
12682      */
12683     if (oldctxt != NULL)
12684         oldctxt->nbentities += ctxt->nbentities;
12685 
12686     /*
12687      * Also record the size of the entity parsed
12688      */
12689     if (ctxt->input != NULL) {
12690 	oldctxt->sizeentities += ctxt->input->consumed;
12691 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12692     }
12693     /*
12694      * And record the last error if any
12695      */
12696     if (ctxt->lastError.code != XML_ERR_OK)
12697         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12698 
12699     if (sax != NULL)
12700 	ctxt->sax = oldsax;
12701     oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12702     oldctxt->node_seq.length = ctxt->node_seq.length;
12703     oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12704     ctxt->node_seq.maximum = 0;
12705     ctxt->node_seq.length = 0;
12706     ctxt->node_seq.buffer = NULL;
12707     xmlFreeParserCtxt(ctxt);
12708     newDoc->intSubset = NULL;
12709     newDoc->extSubset = NULL;
12710     xmlFreeDoc(newDoc);
12711 
12712     return(ret);
12713 }
12714 
12715 #ifdef LIBXML_SAX1_ENABLED
12716 /**
12717  * xmlParseExternalEntity:
12718  * @doc:  the document the chunk pertains to
12719  * @sax:  the SAX handler bloc (possibly NULL)
12720  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12721  * @depth:  Used for loop detection, use 0
12722  * @URL:  the URL for the entity to load
12723  * @ID:  the System ID for the entity to load
12724  * @lst:  the return value for the set of parsed nodes
12725  *
12726  * Parse an external general entity
12727  * An external general parsed entity is well-formed if it matches the
12728  * production labeled extParsedEnt.
12729  *
12730  * [78] extParsedEnt ::= TextDecl? content
12731  *
12732  * Returns 0 if the entity is well formed, -1 in case of args problem and
12733  *    the parser error code otherwise
12734  */
12735 
12736 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12737 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12738 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12739     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12740 		                       ID, lst));
12741 }
12742 
12743 /**
12744  * xmlParseBalancedChunkMemory:
12745  * @doc:  the document the chunk pertains to
12746  * @sax:  the SAX handler bloc (possibly NULL)
12747  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12748  * @depth:  Used for loop detection, use 0
12749  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12750  * @lst:  the return value for the set of parsed nodes
12751  *
12752  * Parse a well-balanced chunk of an XML document
12753  * called by the parser
12754  * The allowed sequence for the Well Balanced Chunk is the one defined by
12755  * the content production in the XML grammar:
12756  *
12757  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12758  *
12759  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12760  *    the parser error code otherwise
12761  */
12762 
12763 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12764 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12765      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12766     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12767                                                 depth, string, lst, 0 );
12768 }
12769 #endif /* LIBXML_SAX1_ENABLED */
12770 
12771 /**
12772  * xmlParseBalancedChunkMemoryInternal:
12773  * @oldctxt:  the existing parsing context
12774  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12775  * @user_data:  the user data field for the parser context
12776  * @lst:  the return value for the set of parsed nodes
12777  *
12778  *
12779  * Parse a well-balanced chunk of an XML document
12780  * called by the parser
12781  * The allowed sequence for the Well Balanced Chunk is the one defined by
12782  * the content production in the XML grammar:
12783  *
12784  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12785  *
12786  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12787  * error code otherwise
12788  *
12789  * In case recover is set to 1, the nodelist will not be empty even if
12790  * the parsed chunk is not well balanced.
12791  */
12792 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12793 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12794 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12795     xmlParserCtxtPtr ctxt;
12796     xmlDocPtr newDoc = NULL;
12797     xmlNodePtr newRoot;
12798     xmlSAXHandlerPtr oldsax = NULL;
12799     xmlNodePtr content = NULL;
12800     xmlNodePtr last = NULL;
12801     int size;
12802     xmlParserErrors ret = XML_ERR_OK;
12803 #ifdef SAX2
12804     int i;
12805 #endif
12806 
12807     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12808         (oldctxt->depth >  1024)) {
12809 	return(XML_ERR_ENTITY_LOOP);
12810     }
12811 
12812 
12813     if (lst != NULL)
12814         *lst = NULL;
12815     if (string == NULL)
12816         return(XML_ERR_INTERNAL_ERROR);
12817 
12818     size = xmlStrlen(string);
12819 
12820     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12821     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12822     if (user_data != NULL)
12823 	ctxt->userData = user_data;
12824     else
12825 	ctxt->userData = ctxt;
12826     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12827     ctxt->dict = oldctxt->dict;
12828     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12829     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12830     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12831 
12832 #ifdef SAX2
12833     /* propagate namespaces down the entity */
12834     for (i = 0;i < oldctxt->nsNr;i += 2) {
12835         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12836     }
12837 #endif
12838 
12839     oldsax = ctxt->sax;
12840     ctxt->sax = oldctxt->sax;
12841     xmlDetectSAX2(ctxt);
12842     ctxt->replaceEntities = oldctxt->replaceEntities;
12843     ctxt->options = oldctxt->options;
12844 
12845     ctxt->_private = oldctxt->_private;
12846     if (oldctxt->myDoc == NULL) {
12847 	newDoc = xmlNewDoc(BAD_CAST "1.0");
12848 	if (newDoc == NULL) {
12849 	    ctxt->sax = oldsax;
12850 	    ctxt->dict = NULL;
12851 	    xmlFreeParserCtxt(ctxt);
12852 	    return(XML_ERR_INTERNAL_ERROR);
12853 	}
12854 	newDoc->properties = XML_DOC_INTERNAL;
12855 	newDoc->dict = ctxt->dict;
12856 	xmlDictReference(newDoc->dict);
12857 	ctxt->myDoc = newDoc;
12858     } else {
12859 	ctxt->myDoc = oldctxt->myDoc;
12860         content = ctxt->myDoc->children;
12861 	last = ctxt->myDoc->last;
12862     }
12863     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12864     if (newRoot == NULL) {
12865 	ctxt->sax = oldsax;
12866 	ctxt->dict = NULL;
12867 	xmlFreeParserCtxt(ctxt);
12868 	if (newDoc != NULL) {
12869 	    xmlFreeDoc(newDoc);
12870 	}
12871 	return(XML_ERR_INTERNAL_ERROR);
12872     }
12873     ctxt->myDoc->children = NULL;
12874     ctxt->myDoc->last = NULL;
12875     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12876     nodePush(ctxt, ctxt->myDoc->children);
12877     ctxt->instate = XML_PARSER_CONTENT;
12878     ctxt->depth = oldctxt->depth + 1;
12879 
12880     ctxt->validate = 0;
12881     ctxt->loadsubset = oldctxt->loadsubset;
12882     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12883 	/*
12884 	 * ID/IDREF registration will be done in xmlValidateElement below
12885 	 */
12886 	ctxt->loadsubset |= XML_SKIP_IDS;
12887     }
12888     ctxt->dictNames = oldctxt->dictNames;
12889     ctxt->attsDefault = oldctxt->attsDefault;
12890     ctxt->attsSpecial = oldctxt->attsSpecial;
12891 
12892     xmlParseContent(ctxt);
12893     if ((RAW == '<') && (NXT(1) == '/')) {
12894 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12895     } else if (RAW != 0) {
12896 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12897     }
12898     if (ctxt->node != ctxt->myDoc->children) {
12899 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12900     }
12901 
12902     if (!ctxt->wellFormed) {
12903         if (ctxt->errNo == 0)
12904 	    ret = XML_ERR_INTERNAL_ERROR;
12905 	else
12906 	    ret = (xmlParserErrors)ctxt->errNo;
12907     } else {
12908       ret = XML_ERR_OK;
12909     }
12910 
12911     if ((lst != NULL) && (ret == XML_ERR_OK)) {
12912 	xmlNodePtr cur;
12913 
12914 	/*
12915 	 * Return the newly created nodeset after unlinking it from
12916 	 * they pseudo parent.
12917 	 */
12918 	cur = ctxt->myDoc->children->children;
12919 	*lst = cur;
12920 	while (cur != NULL) {
12921 #ifdef LIBXML_VALID_ENABLED
12922 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12923 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12924 		(cur->type == XML_ELEMENT_NODE)) {
12925 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12926 			oldctxt->myDoc, cur);
12927 	    }
12928 #endif /* LIBXML_VALID_ENABLED */
12929 	    cur->parent = NULL;
12930 	    cur = cur->next;
12931 	}
12932 	ctxt->myDoc->children->children = NULL;
12933     }
12934     if (ctxt->myDoc != NULL) {
12935 	xmlFreeNode(ctxt->myDoc->children);
12936         ctxt->myDoc->children = content;
12937         ctxt->myDoc->last = last;
12938     }
12939 
12940     /*
12941      * Record in the parent context the number of entities replacement
12942      * done when parsing that reference.
12943      */
12944     if (oldctxt != NULL)
12945         oldctxt->nbentities += ctxt->nbentities;
12946 
12947     /*
12948      * Also record the last error if any
12949      */
12950     if (ctxt->lastError.code != XML_ERR_OK)
12951         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12952 
12953     ctxt->sax = oldsax;
12954     ctxt->dict = NULL;
12955     ctxt->attsDefault = NULL;
12956     ctxt->attsSpecial = NULL;
12957     xmlFreeParserCtxt(ctxt);
12958     if (newDoc != NULL) {
12959 	xmlFreeDoc(newDoc);
12960     }
12961 
12962     return(ret);
12963 }
12964 
12965 /**
12966  * xmlParseInNodeContext:
12967  * @node:  the context node
12968  * @data:  the input string
12969  * @datalen:  the input string length in bytes
12970  * @options:  a combination of xmlParserOption
12971  * @lst:  the return value for the set of parsed nodes
12972  *
12973  * Parse a well-balanced chunk of an XML document
12974  * within the context (DTD, namespaces, etc ...) of the given node.
12975  *
12976  * The allowed sequence for the data is a Well Balanced Chunk defined by
12977  * the content production in the XML grammar:
12978  *
12979  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12980  *
12981  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12982  * error code otherwise
12983  */
12984 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12985 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12986                       int options, xmlNodePtr *lst) {
12987 #ifdef SAX2
12988     xmlParserCtxtPtr ctxt;
12989     xmlDocPtr doc = NULL;
12990     xmlNodePtr fake, cur;
12991     int nsnr = 0;
12992 
12993     xmlParserErrors ret = XML_ERR_OK;
12994 
12995     /*
12996      * check all input parameters, grab the document
12997      */
12998     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12999         return(XML_ERR_INTERNAL_ERROR);
13000     switch (node->type) {
13001         case XML_ELEMENT_NODE:
13002         case XML_ATTRIBUTE_NODE:
13003         case XML_TEXT_NODE:
13004         case XML_CDATA_SECTION_NODE:
13005         case XML_ENTITY_REF_NODE:
13006         case XML_PI_NODE:
13007         case XML_COMMENT_NODE:
13008         case XML_DOCUMENT_NODE:
13009         case XML_HTML_DOCUMENT_NODE:
13010 	    break;
13011 	default:
13012 	    return(XML_ERR_INTERNAL_ERROR);
13013 
13014     }
13015     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13016            (node->type != XML_DOCUMENT_NODE) &&
13017 	   (node->type != XML_HTML_DOCUMENT_NODE))
13018 	node = node->parent;
13019     if (node == NULL)
13020 	return(XML_ERR_INTERNAL_ERROR);
13021     if (node->type == XML_ELEMENT_NODE)
13022 	doc = node->doc;
13023     else
13024         doc = (xmlDocPtr) node;
13025     if (doc == NULL)
13026 	return(XML_ERR_INTERNAL_ERROR);
13027 
13028     /*
13029      * allocate a context and set-up everything not related to the
13030      * node position in the tree
13031      */
13032     if (doc->type == XML_DOCUMENT_NODE)
13033 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13034 #ifdef LIBXML_HTML_ENABLED
13035     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13036 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13037         /*
13038          * When parsing in context, it makes no sense to add implied
13039          * elements like html/body/etc...
13040          */
13041         options |= HTML_PARSE_NOIMPLIED;
13042     }
13043 #endif
13044     else
13045         return(XML_ERR_INTERNAL_ERROR);
13046 
13047     if (ctxt == NULL)
13048         return(XML_ERR_NO_MEMORY);
13049 
13050     /*
13051      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13052      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13053      * we must wait until the last moment to free the original one.
13054      */
13055     if (doc->dict != NULL) {
13056         if (ctxt->dict != NULL)
13057 	    xmlDictFree(ctxt->dict);
13058 	ctxt->dict = doc->dict;
13059     } else
13060         options |= XML_PARSE_NODICT;
13061 
13062     if (doc->encoding != NULL) {
13063         xmlCharEncodingHandlerPtr hdlr;
13064 
13065         if (ctxt->encoding != NULL)
13066 	    xmlFree((xmlChar *) ctxt->encoding);
13067         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13068 
13069         hdlr = xmlFindCharEncodingHandler(doc->encoding);
13070         if (hdlr != NULL) {
13071             xmlSwitchToEncoding(ctxt, hdlr);
13072 	} else {
13073             return(XML_ERR_UNSUPPORTED_ENCODING);
13074         }
13075     }
13076 
13077     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13078     xmlDetectSAX2(ctxt);
13079     ctxt->myDoc = doc;
13080 
13081     fake = xmlNewComment(NULL);
13082     if (fake == NULL) {
13083         xmlFreeParserCtxt(ctxt);
13084 	return(XML_ERR_NO_MEMORY);
13085     }
13086     xmlAddChild(node, fake);
13087 
13088     if (node->type == XML_ELEMENT_NODE) {
13089 	nodePush(ctxt, node);
13090 	/*
13091 	 * initialize the SAX2 namespaces stack
13092 	 */
13093 	cur = node;
13094 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13095 	    xmlNsPtr ns = cur->nsDef;
13096 	    const xmlChar *iprefix, *ihref;
13097 
13098 	    while (ns != NULL) {
13099 		if (ctxt->dict) {
13100 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13101 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13102 		} else {
13103 		    iprefix = ns->prefix;
13104 		    ihref = ns->href;
13105 		}
13106 
13107 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13108 		    nsPush(ctxt, iprefix, ihref);
13109 		    nsnr++;
13110 		}
13111 		ns = ns->next;
13112 	    }
13113 	    cur = cur->parent;
13114 	}
13115 	ctxt->instate = XML_PARSER_CONTENT;
13116     }
13117 
13118     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13119 	/*
13120 	 * ID/IDREF registration will be done in xmlValidateElement below
13121 	 */
13122 	ctxt->loadsubset |= XML_SKIP_IDS;
13123     }
13124 
13125 #ifdef LIBXML_HTML_ENABLED
13126     if (doc->type == XML_HTML_DOCUMENT_NODE)
13127         __htmlParseContent(ctxt);
13128     else
13129 #endif
13130 	xmlParseContent(ctxt);
13131 
13132     nsPop(ctxt, nsnr);
13133     if ((RAW == '<') && (NXT(1) == '/')) {
13134 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13135     } else if (RAW != 0) {
13136 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13137     }
13138     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13139 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13140 	ctxt->wellFormed = 0;
13141     }
13142 
13143     if (!ctxt->wellFormed) {
13144         if (ctxt->errNo == 0)
13145 	    ret = XML_ERR_INTERNAL_ERROR;
13146 	else
13147 	    ret = (xmlParserErrors)ctxt->errNo;
13148     } else {
13149         ret = XML_ERR_OK;
13150     }
13151 
13152     /*
13153      * Return the newly created nodeset after unlinking it from
13154      * the pseudo sibling.
13155      */
13156 
13157     cur = fake->next;
13158     fake->next = NULL;
13159     node->last = fake;
13160 
13161     if (cur != NULL) {
13162 	cur->prev = NULL;
13163     }
13164 
13165     *lst = cur;
13166 
13167     while (cur != NULL) {
13168 	cur->parent = NULL;
13169 	cur = cur->next;
13170     }
13171 
13172     xmlUnlinkNode(fake);
13173     xmlFreeNode(fake);
13174 
13175 
13176     if (ret != XML_ERR_OK) {
13177         xmlFreeNodeList(*lst);
13178 	*lst = NULL;
13179     }
13180 
13181     if (doc->dict != NULL)
13182         ctxt->dict = NULL;
13183     xmlFreeParserCtxt(ctxt);
13184 
13185     return(ret);
13186 #else /* !SAX2 */
13187     return(XML_ERR_INTERNAL_ERROR);
13188 #endif
13189 }
13190 
13191 #ifdef LIBXML_SAX1_ENABLED
13192 /**
13193  * xmlParseBalancedChunkMemoryRecover:
13194  * @doc:  the document the chunk pertains to
13195  * @sax:  the SAX handler bloc (possibly NULL)
13196  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13197  * @depth:  Used for loop detection, use 0
13198  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13199  * @lst:  the return value for the set of parsed nodes
13200  * @recover: return nodes even if the data is broken (use 0)
13201  *
13202  *
13203  * Parse a well-balanced chunk of an XML document
13204  * called by the parser
13205  * The allowed sequence for the Well Balanced Chunk is the one defined by
13206  * the content production in the XML grammar:
13207  *
13208  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13209  *
13210  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13211  *    the parser error code otherwise
13212  *
13213  * In case recover is set to 1, the nodelist will not be empty even if
13214  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13215  * some extent.
13216  */
13217 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13218 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13219      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13220      int recover) {
13221     xmlParserCtxtPtr ctxt;
13222     xmlDocPtr newDoc;
13223     xmlSAXHandlerPtr oldsax = NULL;
13224     xmlNodePtr content, newRoot;
13225     int size;
13226     int ret = 0;
13227 
13228     if (depth > 40) {
13229 	return(XML_ERR_ENTITY_LOOP);
13230     }
13231 
13232 
13233     if (lst != NULL)
13234         *lst = NULL;
13235     if (string == NULL)
13236         return(-1);
13237 
13238     size = xmlStrlen(string);
13239 
13240     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13241     if (ctxt == NULL) return(-1);
13242     ctxt->userData = ctxt;
13243     if (sax != NULL) {
13244 	oldsax = ctxt->sax;
13245         ctxt->sax = sax;
13246 	if (user_data != NULL)
13247 	    ctxt->userData = user_data;
13248     }
13249     newDoc = xmlNewDoc(BAD_CAST "1.0");
13250     if (newDoc == NULL) {
13251 	xmlFreeParserCtxt(ctxt);
13252 	return(-1);
13253     }
13254     newDoc->properties = XML_DOC_INTERNAL;
13255     if ((doc != NULL) && (doc->dict != NULL)) {
13256         xmlDictFree(ctxt->dict);
13257 	ctxt->dict = doc->dict;
13258 	xmlDictReference(ctxt->dict);
13259 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13260 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13261 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13262 	ctxt->dictNames = 1;
13263     } else {
13264 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13265     }
13266     if (doc != NULL) {
13267 	newDoc->intSubset = doc->intSubset;
13268 	newDoc->extSubset = doc->extSubset;
13269     }
13270     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13271     if (newRoot == NULL) {
13272 	if (sax != NULL)
13273 	    ctxt->sax = oldsax;
13274 	xmlFreeParserCtxt(ctxt);
13275 	newDoc->intSubset = NULL;
13276 	newDoc->extSubset = NULL;
13277         xmlFreeDoc(newDoc);
13278 	return(-1);
13279     }
13280     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13281     nodePush(ctxt, newRoot);
13282     if (doc == NULL) {
13283 	ctxt->myDoc = newDoc;
13284     } else {
13285 	ctxt->myDoc = newDoc;
13286 	newDoc->children->doc = doc;
13287 	/* Ensure that doc has XML spec namespace */
13288 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13289 	newDoc->oldNs = doc->oldNs;
13290     }
13291     ctxt->instate = XML_PARSER_CONTENT;
13292     ctxt->depth = depth;
13293 
13294     /*
13295      * Doing validity checking on chunk doesn't make sense
13296      */
13297     ctxt->validate = 0;
13298     ctxt->loadsubset = 0;
13299     xmlDetectSAX2(ctxt);
13300 
13301     if ( doc != NULL ){
13302         content = doc->children;
13303         doc->children = NULL;
13304         xmlParseContent(ctxt);
13305         doc->children = content;
13306     }
13307     else {
13308         xmlParseContent(ctxt);
13309     }
13310     if ((RAW == '<') && (NXT(1) == '/')) {
13311 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13312     } else if (RAW != 0) {
13313 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13314     }
13315     if (ctxt->node != newDoc->children) {
13316 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13317     }
13318 
13319     if (!ctxt->wellFormed) {
13320         if (ctxt->errNo == 0)
13321 	    ret = 1;
13322 	else
13323 	    ret = ctxt->errNo;
13324     } else {
13325       ret = 0;
13326     }
13327 
13328     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13329 	xmlNodePtr cur;
13330 
13331 	/*
13332 	 * Return the newly created nodeset after unlinking it from
13333 	 * they pseudo parent.
13334 	 */
13335 	cur = newDoc->children->children;
13336 	*lst = cur;
13337 	while (cur != NULL) {
13338 	    xmlSetTreeDoc(cur, doc);
13339 	    cur->parent = NULL;
13340 	    cur = cur->next;
13341 	}
13342 	newDoc->children->children = NULL;
13343     }
13344 
13345     if (sax != NULL)
13346 	ctxt->sax = oldsax;
13347     xmlFreeParserCtxt(ctxt);
13348     newDoc->intSubset = NULL;
13349     newDoc->extSubset = NULL;
13350     newDoc->oldNs = NULL;
13351     xmlFreeDoc(newDoc);
13352 
13353     return(ret);
13354 }
13355 
13356 /**
13357  * xmlSAXParseEntity:
13358  * @sax:  the SAX handler block
13359  * @filename:  the filename
13360  *
13361  * parse an XML external entity out of context and build a tree.
13362  * It use the given SAX function block to handle the parsing callback.
13363  * If sax is NULL, fallback to the default DOM tree building routines.
13364  *
13365  * [78] extParsedEnt ::= TextDecl? content
13366  *
13367  * This correspond to a "Well Balanced" chunk
13368  *
13369  * Returns the resulting document tree
13370  */
13371 
13372 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13373 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13374     xmlDocPtr ret;
13375     xmlParserCtxtPtr ctxt;
13376 
13377     ctxt = xmlCreateFileParserCtxt(filename);
13378     if (ctxt == NULL) {
13379 	return(NULL);
13380     }
13381     if (sax != NULL) {
13382 	if (ctxt->sax != NULL)
13383 	    xmlFree(ctxt->sax);
13384         ctxt->sax = sax;
13385         ctxt->userData = NULL;
13386     }
13387 
13388     xmlParseExtParsedEnt(ctxt);
13389 
13390     if (ctxt->wellFormed)
13391 	ret = ctxt->myDoc;
13392     else {
13393         ret = NULL;
13394         xmlFreeDoc(ctxt->myDoc);
13395         ctxt->myDoc = NULL;
13396     }
13397     if (sax != NULL)
13398         ctxt->sax = NULL;
13399     xmlFreeParserCtxt(ctxt);
13400 
13401     return(ret);
13402 }
13403 
13404 /**
13405  * xmlParseEntity:
13406  * @filename:  the filename
13407  *
13408  * parse an XML external entity out of context and build a tree.
13409  *
13410  * [78] extParsedEnt ::= TextDecl? content
13411  *
13412  * This correspond to a "Well Balanced" chunk
13413  *
13414  * Returns the resulting document tree
13415  */
13416 
13417 xmlDocPtr
xmlParseEntity(const char * filename)13418 xmlParseEntity(const char *filename) {
13419     return(xmlSAXParseEntity(NULL, filename));
13420 }
13421 #endif /* LIBXML_SAX1_ENABLED */
13422 
13423 /**
13424  * xmlCreateEntityParserCtxtInternal:
13425  * @URL:  the entity URL
13426  * @ID:  the entity PUBLIC ID
13427  * @base:  a possible base for the target URI
13428  * @pctx:  parser context used to set options on new context
13429  *
13430  * Create a parser context for an external entity
13431  * Automatic support for ZLIB/Compress compressed document is provided
13432  * by default if found at compile-time.
13433  *
13434  * Returns the new parser context or NULL
13435  */
13436 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13437 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13438 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13439     xmlParserCtxtPtr ctxt;
13440     xmlParserInputPtr inputStream;
13441     char *directory = NULL;
13442     xmlChar *uri;
13443 
13444     ctxt = xmlNewParserCtxt();
13445     if (ctxt == NULL) {
13446 	return(NULL);
13447     }
13448 
13449     if (pctx != NULL) {
13450         ctxt->options = pctx->options;
13451         ctxt->_private = pctx->_private;
13452     }
13453 
13454     uri = xmlBuildURI(URL, base);
13455 
13456     if (uri == NULL) {
13457 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13458 	if (inputStream == NULL) {
13459 	    xmlFreeParserCtxt(ctxt);
13460 	    return(NULL);
13461 	}
13462 
13463 	inputPush(ctxt, inputStream);
13464 
13465 	if ((ctxt->directory == NULL) && (directory == NULL))
13466 	    directory = xmlParserGetDirectory((char *)URL);
13467 	if ((ctxt->directory == NULL) && (directory != NULL))
13468 	    ctxt->directory = directory;
13469     } else {
13470 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13471 	if (inputStream == NULL) {
13472 	    xmlFree(uri);
13473 	    xmlFreeParserCtxt(ctxt);
13474 	    return(NULL);
13475 	}
13476 
13477 	inputPush(ctxt, inputStream);
13478 
13479 	if ((ctxt->directory == NULL) && (directory == NULL))
13480 	    directory = xmlParserGetDirectory((char *)uri);
13481 	if ((ctxt->directory == NULL) && (directory != NULL))
13482 	    ctxt->directory = directory;
13483 	xmlFree(uri);
13484     }
13485     return(ctxt);
13486 }
13487 
13488 /**
13489  * xmlCreateEntityParserCtxt:
13490  * @URL:  the entity URL
13491  * @ID:  the entity PUBLIC ID
13492  * @base:  a possible base for the target URI
13493  *
13494  * Create a parser context for an external entity
13495  * Automatic support for ZLIB/Compress compressed document is provided
13496  * by default if found at compile-time.
13497  *
13498  * Returns the new parser context or NULL
13499  */
13500 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13501 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13502 	                  const xmlChar *base) {
13503     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13504 
13505 }
13506 
13507 /************************************************************************
13508  *									*
13509  *		Front ends when parsing from a file			*
13510  *									*
13511  ************************************************************************/
13512 
13513 /**
13514  * xmlCreateURLParserCtxt:
13515  * @filename:  the filename or URL
13516  * @options:  a combination of xmlParserOption
13517  *
13518  * Create a parser context for a file or URL content.
13519  * Automatic support for ZLIB/Compress compressed document is provided
13520  * by default if found at compile-time and for file accesses
13521  *
13522  * Returns the new parser context or NULL
13523  */
13524 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13525 xmlCreateURLParserCtxt(const char *filename, int options)
13526 {
13527     xmlParserCtxtPtr ctxt;
13528     xmlParserInputPtr inputStream;
13529     char *directory = NULL;
13530 
13531     ctxt = xmlNewParserCtxt();
13532     if (ctxt == NULL) {
13533 	xmlErrMemory(NULL, "cannot allocate parser context");
13534 	return(NULL);
13535     }
13536 
13537     if (options)
13538 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13539     ctxt->linenumbers = 1;
13540 
13541     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13542     if (inputStream == NULL) {
13543 	xmlFreeParserCtxt(ctxt);
13544 	return(NULL);
13545     }
13546 
13547     inputPush(ctxt, inputStream);
13548     if ((ctxt->directory == NULL) && (directory == NULL))
13549         directory = xmlParserGetDirectory(filename);
13550     if ((ctxt->directory == NULL) && (directory != NULL))
13551         ctxt->directory = directory;
13552 
13553     return(ctxt);
13554 }
13555 
13556 /**
13557  * xmlCreateFileParserCtxt:
13558  * @filename:  the filename
13559  *
13560  * Create a parser context for a file content.
13561  * Automatic support for ZLIB/Compress compressed document is provided
13562  * by default if found at compile-time.
13563  *
13564  * Returns the new parser context or NULL
13565  */
13566 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13567 xmlCreateFileParserCtxt(const char *filename)
13568 {
13569     return(xmlCreateURLParserCtxt(filename, 0));
13570 }
13571 
13572 #ifdef LIBXML_SAX1_ENABLED
13573 /**
13574  * xmlSAXParseFileWithData:
13575  * @sax:  the SAX handler block
13576  * @filename:  the filename
13577  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13578  *             documents
13579  * @data:  the userdata
13580  *
13581  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13582  * compressed document is provided by default if found at compile-time.
13583  * It use the given SAX function block to handle the parsing callback.
13584  * If sax is NULL, fallback to the default DOM tree building routines.
13585  *
13586  * User data (void *) is stored within the parser context in the
13587  * context's _private member, so it is available nearly everywhere in libxml
13588  *
13589  * Returns the resulting document tree
13590  */
13591 
13592 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13593 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13594                         int recovery, void *data) {
13595     xmlDocPtr ret;
13596     xmlParserCtxtPtr ctxt;
13597 
13598     xmlInitParser();
13599 
13600     ctxt = xmlCreateFileParserCtxt(filename);
13601     if (ctxt == NULL) {
13602 	return(NULL);
13603     }
13604     if (sax != NULL) {
13605 	if (ctxt->sax != NULL)
13606 	    xmlFree(ctxt->sax);
13607         ctxt->sax = sax;
13608     }
13609     xmlDetectSAX2(ctxt);
13610     if (data!=NULL) {
13611 	ctxt->_private = data;
13612     }
13613 
13614     if (ctxt->directory == NULL)
13615         ctxt->directory = xmlParserGetDirectory(filename);
13616 
13617     ctxt->recovery = recovery;
13618 
13619     xmlParseDocument(ctxt);
13620 
13621     if ((ctxt->wellFormed) || recovery) {
13622         ret = ctxt->myDoc;
13623 	if (ret != NULL) {
13624 	    if (ctxt->input->buf->compressed > 0)
13625 		ret->compression = 9;
13626 	    else
13627 		ret->compression = ctxt->input->buf->compressed;
13628 	}
13629     }
13630     else {
13631        ret = NULL;
13632        xmlFreeDoc(ctxt->myDoc);
13633        ctxt->myDoc = NULL;
13634     }
13635     if (sax != NULL)
13636         ctxt->sax = NULL;
13637     xmlFreeParserCtxt(ctxt);
13638 
13639     return(ret);
13640 }
13641 
13642 /**
13643  * xmlSAXParseFile:
13644  * @sax:  the SAX handler block
13645  * @filename:  the filename
13646  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13647  *             documents
13648  *
13649  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13650  * compressed document is provided by default if found at compile-time.
13651  * It use the given SAX function block to handle the parsing callback.
13652  * If sax is NULL, fallback to the default DOM tree building routines.
13653  *
13654  * Returns the resulting document tree
13655  */
13656 
13657 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13658 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13659                           int recovery) {
13660     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13661 }
13662 
13663 /**
13664  * xmlRecoverDoc:
13665  * @cur:  a pointer to an array of xmlChar
13666  *
13667  * parse an XML in-memory document and build a tree.
13668  * In the case the document is not Well Formed, a attempt to build a
13669  * tree is tried anyway
13670  *
13671  * Returns the resulting document tree or NULL in case of failure
13672  */
13673 
13674 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)13675 xmlRecoverDoc(const xmlChar *cur) {
13676     return(xmlSAXParseDoc(NULL, cur, 1));
13677 }
13678 
13679 /**
13680  * xmlParseFile:
13681  * @filename:  the filename
13682  *
13683  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13684  * compressed document is provided by default if found at compile-time.
13685  *
13686  * Returns the resulting document tree if the file was wellformed,
13687  * NULL otherwise.
13688  */
13689 
13690 xmlDocPtr
xmlParseFile(const char * filename)13691 xmlParseFile(const char *filename) {
13692     return(xmlSAXParseFile(NULL, filename, 0));
13693 }
13694 
13695 /**
13696  * xmlRecoverFile:
13697  * @filename:  the filename
13698  *
13699  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13700  * compressed document is provided by default if found at compile-time.
13701  * In the case the document is not Well Formed, it attempts to build
13702  * a tree anyway
13703  *
13704  * Returns the resulting document tree or NULL in case of failure
13705  */
13706 
13707 xmlDocPtr
xmlRecoverFile(const char * filename)13708 xmlRecoverFile(const char *filename) {
13709     return(xmlSAXParseFile(NULL, filename, 1));
13710 }
13711 
13712 
13713 /**
13714  * xmlSetupParserForBuffer:
13715  * @ctxt:  an XML parser context
13716  * @buffer:  a xmlChar * buffer
13717  * @filename:  a file name
13718  *
13719  * Setup the parser context to parse a new buffer; Clears any prior
13720  * contents from the parser context. The buffer parameter must not be
13721  * NULL, but the filename parameter can be
13722  */
13723 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13724 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13725                              const char* filename)
13726 {
13727     xmlParserInputPtr input;
13728 
13729     if ((ctxt == NULL) || (buffer == NULL))
13730         return;
13731 
13732     input = xmlNewInputStream(ctxt);
13733     if (input == NULL) {
13734         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13735         xmlClearParserCtxt(ctxt);
13736         return;
13737     }
13738 
13739     xmlClearParserCtxt(ctxt);
13740     if (filename != NULL)
13741         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13742     input->base = buffer;
13743     input->cur = buffer;
13744     input->end = &buffer[xmlStrlen(buffer)];
13745     inputPush(ctxt, input);
13746 }
13747 
13748 /**
13749  * xmlSAXUserParseFile:
13750  * @sax:  a SAX handler
13751  * @user_data:  The user data returned on SAX callbacks
13752  * @filename:  a file name
13753  *
13754  * parse an XML file and call the given SAX handler routines.
13755  * Automatic support for ZLIB/Compress compressed document is provided
13756  *
13757  * Returns 0 in case of success or a error number otherwise
13758  */
13759 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13760 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13761                     const char *filename) {
13762     int ret = 0;
13763     xmlParserCtxtPtr ctxt;
13764 
13765     ctxt = xmlCreateFileParserCtxt(filename);
13766     if (ctxt == NULL) return -1;
13767     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13768 	xmlFree(ctxt->sax);
13769     ctxt->sax = sax;
13770     xmlDetectSAX2(ctxt);
13771 
13772     if (user_data != NULL)
13773 	ctxt->userData = user_data;
13774 
13775     xmlParseDocument(ctxt);
13776 
13777     if (ctxt->wellFormed)
13778 	ret = 0;
13779     else {
13780         if (ctxt->errNo != 0)
13781 	    ret = ctxt->errNo;
13782 	else
13783 	    ret = -1;
13784     }
13785     if (sax != NULL)
13786 	ctxt->sax = NULL;
13787     if (ctxt->myDoc != NULL) {
13788         xmlFreeDoc(ctxt->myDoc);
13789 	ctxt->myDoc = NULL;
13790     }
13791     xmlFreeParserCtxt(ctxt);
13792 
13793     return ret;
13794 }
13795 #endif /* LIBXML_SAX1_ENABLED */
13796 
13797 /************************************************************************
13798  *									*
13799  * 		Front ends when parsing from memory			*
13800  *									*
13801  ************************************************************************/
13802 
13803 /**
13804  * xmlCreateMemoryParserCtxt:
13805  * @buffer:  a pointer to a char array
13806  * @size:  the size of the array
13807  *
13808  * Create a parser context for an XML in-memory document.
13809  *
13810  * Returns the new parser context or NULL
13811  */
13812 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)13813 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13814     xmlParserCtxtPtr ctxt;
13815     xmlParserInputPtr input;
13816     xmlParserInputBufferPtr buf;
13817 
13818     if (buffer == NULL)
13819 	return(NULL);
13820     if (size <= 0)
13821 	return(NULL);
13822 
13823     ctxt = xmlNewParserCtxt();
13824     if (ctxt == NULL)
13825 	return(NULL);
13826 
13827     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13828     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13829     if (buf == NULL) {
13830 	xmlFreeParserCtxt(ctxt);
13831 	return(NULL);
13832     }
13833 
13834     input = xmlNewInputStream(ctxt);
13835     if (input == NULL) {
13836 	xmlFreeParserInputBuffer(buf);
13837 	xmlFreeParserCtxt(ctxt);
13838 	return(NULL);
13839     }
13840 
13841     input->filename = NULL;
13842     input->buf = buf;
13843     input->base = input->buf->buffer->content;
13844     input->cur = input->buf->buffer->content;
13845     input->end = &input->buf->buffer->content[input->buf->buffer->use];
13846 
13847     inputPush(ctxt, input);
13848     return(ctxt);
13849 }
13850 
13851 #ifdef LIBXML_SAX1_ENABLED
13852 /**
13853  * xmlSAXParseMemoryWithData:
13854  * @sax:  the SAX handler block
13855  * @buffer:  an pointer to a char array
13856  * @size:  the size of the array
13857  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13858  *             documents
13859  * @data:  the userdata
13860  *
13861  * parse an XML in-memory block and use the given SAX function block
13862  * to handle the parsing callback. If sax is NULL, fallback to the default
13863  * DOM tree building routines.
13864  *
13865  * User data (void *) is stored within the parser context in the
13866  * context's _private member, so it is available nearly everywhere in libxml
13867  *
13868  * Returns the resulting document tree
13869  */
13870 
13871 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13872 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13873 	          int size, int recovery, void *data) {
13874     xmlDocPtr ret;
13875     xmlParserCtxtPtr ctxt;
13876 
13877     xmlInitParser();
13878 
13879     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13880     if (ctxt == NULL) return(NULL);
13881     if (sax != NULL) {
13882 	if (ctxt->sax != NULL)
13883 	    xmlFree(ctxt->sax);
13884         ctxt->sax = sax;
13885     }
13886     xmlDetectSAX2(ctxt);
13887     if (data!=NULL) {
13888 	ctxt->_private=data;
13889     }
13890 
13891     ctxt->recovery = recovery;
13892 
13893     xmlParseDocument(ctxt);
13894 
13895     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13896     else {
13897        ret = NULL;
13898        xmlFreeDoc(ctxt->myDoc);
13899        ctxt->myDoc = NULL;
13900     }
13901     if (sax != NULL)
13902 	ctxt->sax = NULL;
13903     xmlFreeParserCtxt(ctxt);
13904 
13905     return(ret);
13906 }
13907 
13908 /**
13909  * xmlSAXParseMemory:
13910  * @sax:  the SAX handler block
13911  * @buffer:  an pointer to a char array
13912  * @size:  the size of the array
13913  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13914  *             documents
13915  *
13916  * parse an XML in-memory block and use the given SAX function block
13917  * to handle the parsing callback. If sax is NULL, fallback to the default
13918  * DOM tree building routines.
13919  *
13920  * Returns the resulting document tree
13921  */
13922 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13923 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13924 	          int size, int recovery) {
13925     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13926 }
13927 
13928 /**
13929  * xmlParseMemory:
13930  * @buffer:  an pointer to a char array
13931  * @size:  the size of the array
13932  *
13933  * parse an XML in-memory block and build a tree.
13934  *
13935  * Returns the resulting document tree
13936  */
13937 
xmlParseMemory(const char * buffer,int size)13938 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13939    return(xmlSAXParseMemory(NULL, buffer, size, 0));
13940 }
13941 
13942 /**
13943  * xmlRecoverMemory:
13944  * @buffer:  an pointer to a char array
13945  * @size:  the size of the array
13946  *
13947  * parse an XML in-memory block and build a tree.
13948  * In the case the document is not Well Formed, an attempt to
13949  * build a tree is tried anyway
13950  *
13951  * Returns the resulting document tree or NULL in case of error
13952  */
13953 
xmlRecoverMemory(const char * buffer,int size)13954 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13955    return(xmlSAXParseMemory(NULL, buffer, size, 1));
13956 }
13957 
13958 /**
13959  * xmlSAXUserParseMemory:
13960  * @sax:  a SAX handler
13961  * @user_data:  The user data returned on SAX callbacks
13962  * @buffer:  an in-memory XML document input
13963  * @size:  the length of the XML document in bytes
13964  *
13965  * A better SAX parsing routine.
13966  * parse an XML in-memory buffer and call the given SAX handler routines.
13967  *
13968  * Returns 0 in case of success or a error number otherwise
13969  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13970 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13971 			  const char *buffer, int size) {
13972     int ret = 0;
13973     xmlParserCtxtPtr ctxt;
13974 
13975     xmlInitParser();
13976 
13977     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13978     if (ctxt == NULL) return -1;
13979     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13980         xmlFree(ctxt->sax);
13981     ctxt->sax = sax;
13982     xmlDetectSAX2(ctxt);
13983 
13984     if (user_data != NULL)
13985 	ctxt->userData = user_data;
13986 
13987     xmlParseDocument(ctxt);
13988 
13989     if (ctxt->wellFormed)
13990 	ret = 0;
13991     else {
13992         if (ctxt->errNo != 0)
13993 	    ret = ctxt->errNo;
13994 	else
13995 	    ret = -1;
13996     }
13997     if (sax != NULL)
13998         ctxt->sax = NULL;
13999     if (ctxt->myDoc != NULL) {
14000         xmlFreeDoc(ctxt->myDoc);
14001 	ctxt->myDoc = NULL;
14002     }
14003     xmlFreeParserCtxt(ctxt);
14004 
14005     return ret;
14006 }
14007 #endif /* LIBXML_SAX1_ENABLED */
14008 
14009 /**
14010  * xmlCreateDocParserCtxt:
14011  * @cur:  a pointer to an array of xmlChar
14012  *
14013  * Creates a parser context for an XML in-memory document.
14014  *
14015  * Returns the new parser context or NULL
14016  */
14017 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14018 xmlCreateDocParserCtxt(const xmlChar *cur) {
14019     int len;
14020 
14021     if (cur == NULL)
14022 	return(NULL);
14023     len = xmlStrlen(cur);
14024     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14025 }
14026 
14027 #ifdef LIBXML_SAX1_ENABLED
14028 /**
14029  * xmlSAXParseDoc:
14030  * @sax:  the SAX handler block
14031  * @cur:  a pointer to an array of xmlChar
14032  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14033  *             documents
14034  *
14035  * parse an XML in-memory document and build a tree.
14036  * It use the given SAX function block to handle the parsing callback.
14037  * If sax is NULL, fallback to the default DOM tree building routines.
14038  *
14039  * Returns the resulting document tree
14040  */
14041 
14042 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14043 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14044     xmlDocPtr ret;
14045     xmlParserCtxtPtr ctxt;
14046     xmlSAXHandlerPtr oldsax = NULL;
14047 
14048     if (cur == NULL) return(NULL);
14049 
14050 
14051     ctxt = xmlCreateDocParserCtxt(cur);
14052     if (ctxt == NULL) return(NULL);
14053     if (sax != NULL) {
14054         oldsax = ctxt->sax;
14055         ctxt->sax = sax;
14056         ctxt->userData = NULL;
14057     }
14058     xmlDetectSAX2(ctxt);
14059 
14060     xmlParseDocument(ctxt);
14061     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14062     else {
14063        ret = NULL;
14064        xmlFreeDoc(ctxt->myDoc);
14065        ctxt->myDoc = NULL;
14066     }
14067     if (sax != NULL)
14068 	ctxt->sax = oldsax;
14069     xmlFreeParserCtxt(ctxt);
14070 
14071     return(ret);
14072 }
14073 
14074 /**
14075  * xmlParseDoc:
14076  * @cur:  a pointer to an array of xmlChar
14077  *
14078  * parse an XML in-memory document and build a tree.
14079  *
14080  * Returns the resulting document tree
14081  */
14082 
14083 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14084 xmlParseDoc(const xmlChar *cur) {
14085     return(xmlSAXParseDoc(NULL, cur, 0));
14086 }
14087 #endif /* LIBXML_SAX1_ENABLED */
14088 
14089 #ifdef LIBXML_LEGACY_ENABLED
14090 /************************************************************************
14091  *									*
14092  * 	Specific function to keep track of entities references		*
14093  * 	and used by the XSLT debugger					*
14094  *									*
14095  ************************************************************************/
14096 
14097 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14098 
14099 /**
14100  * xmlAddEntityReference:
14101  * @ent : A valid entity
14102  * @firstNode : A valid first node for children of entity
14103  * @lastNode : A valid last node of children entity
14104  *
14105  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14106  */
14107 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14108 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14109                       xmlNodePtr lastNode)
14110 {
14111     if (xmlEntityRefFunc != NULL) {
14112         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14113     }
14114 }
14115 
14116 
14117 /**
14118  * xmlSetEntityReferenceFunc:
14119  * @func: A valid function
14120  *
14121  * Set the function to call call back when a xml reference has been made
14122  */
14123 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14124 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14125 {
14126     xmlEntityRefFunc = func;
14127 }
14128 #endif /* LIBXML_LEGACY_ENABLED */
14129 
14130 /************************************************************************
14131  *									*
14132  * 				Miscellaneous				*
14133  *									*
14134  ************************************************************************/
14135 
14136 #ifdef LIBXML_XPATH_ENABLED
14137 #include <libxml/xpath.h>
14138 #endif
14139 
14140 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14141 static int xmlParserInitialized = 0;
14142 
14143 /**
14144  * xmlInitParser:
14145  *
14146  * Initialization function for the XML parser.
14147  * This is not reentrant. Call once before processing in case of
14148  * use in multithreaded programs.
14149  */
14150 
14151 void
xmlInitParser(void)14152 xmlInitParser(void) {
14153     if (xmlParserInitialized != 0)
14154 	return;
14155 
14156 #ifdef LIBXML_THREAD_ENABLED
14157     __xmlGlobalInitMutexLock();
14158     if (xmlParserInitialized == 0) {
14159 #endif
14160 	xmlInitThreads();
14161 	xmlInitGlobals();
14162 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14163 	    (xmlGenericError == NULL))
14164 	    initGenericErrorDefaultFunc(NULL);
14165 	xmlInitMemory();
14166 	xmlInitCharEncodingHandlers();
14167 	xmlDefaultSAXHandlerInit();
14168 	xmlRegisterDefaultInputCallbacks();
14169 #ifdef LIBXML_OUTPUT_ENABLED
14170 	xmlRegisterDefaultOutputCallbacks();
14171 #endif /* LIBXML_OUTPUT_ENABLED */
14172 #ifdef LIBXML_HTML_ENABLED
14173 	htmlInitAutoClose();
14174 	htmlDefaultSAXHandlerInit();
14175 #endif
14176 #ifdef LIBXML_XPATH_ENABLED
14177 	xmlXPathInit();
14178 #endif
14179 	xmlParserInitialized = 1;
14180 #ifdef LIBXML_THREAD_ENABLED
14181     }
14182     __xmlGlobalInitMutexUnlock();
14183 #endif
14184 }
14185 
14186 /**
14187  * xmlCleanupParser:
14188  *
14189  * This function name is somewhat misleading. It does not clean up
14190  * parser state, it cleans up memory allocated by the library itself.
14191  * It is a cleanup function for the XML library. It tries to reclaim all
14192  * related global memory allocated for the library processing.
14193  * It doesn't deallocate any document related memory. One should
14194  * call xmlCleanupParser() only when the process has finished using
14195  * the library and all XML/HTML documents built with it.
14196  * See also xmlInitParser() which has the opposite function of preparing
14197  * the library for operations.
14198  *
14199  * WARNING: if your application is multithreaded or has plugin support
14200  *          calling this may crash the application if another thread or
14201  *          a plugin is still using libxml2. It's sometimes very hard to
14202  *          guess if libxml2 is in use in the application, some libraries
14203  *          or plugins may use it without notice. In case of doubt abstain
14204  *          from calling this function or do it just before calling exit()
14205  *          to avoid leak reports from valgrind !
14206  */
14207 
14208 void
xmlCleanupParser(void)14209 xmlCleanupParser(void) {
14210     if (!xmlParserInitialized)
14211 	return;
14212 
14213     xmlCleanupCharEncodingHandlers();
14214 #ifdef LIBXML_CATALOG_ENABLED
14215     xmlCatalogCleanup();
14216 #endif
14217     xmlDictCleanup();
14218     xmlCleanupInputCallbacks();
14219 #ifdef LIBXML_OUTPUT_ENABLED
14220     xmlCleanupOutputCallbacks();
14221 #endif
14222 #ifdef LIBXML_SCHEMAS_ENABLED
14223     xmlSchemaCleanupTypes();
14224     xmlRelaxNGCleanupTypes();
14225 #endif
14226     xmlCleanupGlobals();
14227     xmlResetLastError();
14228     xmlCleanupThreads(); /* must be last if called not from the main thread */
14229     xmlCleanupMemory();
14230     xmlParserInitialized = 0;
14231 }
14232 
14233 /************************************************************************
14234  *									*
14235  *	New set (2.6.0) of simpler and more flexible APIs		*
14236  *									*
14237  ************************************************************************/
14238 
14239 /**
14240  * DICT_FREE:
14241  * @str:  a string
14242  *
14243  * Free a string if it is not owned by the "dict" dictionnary in the
14244  * current scope
14245  */
14246 #define DICT_FREE(str)						\
14247 	if ((str) && ((!dict) || 				\
14248 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14249 	    xmlFree((char *)(str));
14250 
14251 /**
14252  * xmlCtxtReset:
14253  * @ctxt: an XML parser context
14254  *
14255  * Reset a parser context
14256  */
14257 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14258 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14259 {
14260     xmlParserInputPtr input;
14261     xmlDictPtr dict;
14262 
14263     if (ctxt == NULL)
14264         return;
14265 
14266     dict = ctxt->dict;
14267 
14268     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14269         xmlFreeInputStream(input);
14270     }
14271     ctxt->inputNr = 0;
14272     ctxt->input = NULL;
14273 
14274     ctxt->spaceNr = 0;
14275     if (ctxt->spaceTab != NULL) {
14276 	ctxt->spaceTab[0] = -1;
14277 	ctxt->space = &ctxt->spaceTab[0];
14278     } else {
14279         ctxt->space = NULL;
14280     }
14281 
14282 
14283     ctxt->nodeNr = 0;
14284     ctxt->node = NULL;
14285 
14286     ctxt->nameNr = 0;
14287     ctxt->name = NULL;
14288 
14289     DICT_FREE(ctxt->version);
14290     ctxt->version = NULL;
14291     DICT_FREE(ctxt->encoding);
14292     ctxt->encoding = NULL;
14293     DICT_FREE(ctxt->directory);
14294     ctxt->directory = NULL;
14295     DICT_FREE(ctxt->extSubURI);
14296     ctxt->extSubURI = NULL;
14297     DICT_FREE(ctxt->extSubSystem);
14298     ctxt->extSubSystem = NULL;
14299     if (ctxt->myDoc != NULL)
14300         xmlFreeDoc(ctxt->myDoc);
14301     ctxt->myDoc = NULL;
14302 
14303     ctxt->standalone = -1;
14304     ctxt->hasExternalSubset = 0;
14305     ctxt->hasPErefs = 0;
14306     ctxt->html = 0;
14307     ctxt->external = 0;
14308     ctxt->instate = XML_PARSER_START;
14309     ctxt->token = 0;
14310 
14311     ctxt->wellFormed = 1;
14312     ctxt->nsWellFormed = 1;
14313     ctxt->disableSAX = 0;
14314     ctxt->valid = 1;
14315 #if 0
14316     ctxt->vctxt.userData = ctxt;
14317     ctxt->vctxt.error = xmlParserValidityError;
14318     ctxt->vctxt.warning = xmlParserValidityWarning;
14319 #endif
14320     ctxt->record_info = 0;
14321     ctxt->nbChars = 0;
14322     ctxt->checkIndex = 0;
14323     ctxt->inSubset = 0;
14324     ctxt->errNo = XML_ERR_OK;
14325     ctxt->depth = 0;
14326     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14327     ctxt->catalogs = NULL;
14328     ctxt->nbentities = 0;
14329     ctxt->sizeentities = 0;
14330     xmlInitNodeInfoSeq(&ctxt->node_seq);
14331 
14332     if (ctxt->attsDefault != NULL) {
14333         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14334         ctxt->attsDefault = NULL;
14335     }
14336     if (ctxt->attsSpecial != NULL) {
14337         xmlHashFree(ctxt->attsSpecial, NULL);
14338         ctxt->attsSpecial = NULL;
14339     }
14340 
14341 #ifdef LIBXML_CATALOG_ENABLED
14342     if (ctxt->catalogs != NULL)
14343 	xmlCatalogFreeLocal(ctxt->catalogs);
14344 #endif
14345     if (ctxt->lastError.code != XML_ERR_OK)
14346         xmlResetError(&ctxt->lastError);
14347 }
14348 
14349 /**
14350  * xmlCtxtResetPush:
14351  * @ctxt: an XML parser context
14352  * @chunk:  a pointer to an array of chars
14353  * @size:  number of chars in the array
14354  * @filename:  an optional file name or URI
14355  * @encoding:  the document encoding, or NULL
14356  *
14357  * Reset a push parser context
14358  *
14359  * Returns 0 in case of success and 1 in case of error
14360  */
14361 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14362 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14363                  int size, const char *filename, const char *encoding)
14364 {
14365     xmlParserInputPtr inputStream;
14366     xmlParserInputBufferPtr buf;
14367     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14368 
14369     if (ctxt == NULL)
14370         return(1);
14371 
14372     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14373         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14374 
14375     buf = xmlAllocParserInputBuffer(enc);
14376     if (buf == NULL)
14377         return(1);
14378 
14379     if (ctxt == NULL) {
14380         xmlFreeParserInputBuffer(buf);
14381         return(1);
14382     }
14383 
14384     xmlCtxtReset(ctxt);
14385 
14386     if (ctxt->pushTab == NULL) {
14387         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14388 	                                    sizeof(xmlChar *));
14389         if (ctxt->pushTab == NULL) {
14390 	    xmlErrMemory(ctxt, NULL);
14391             xmlFreeParserInputBuffer(buf);
14392             return(1);
14393         }
14394     }
14395 
14396     if (filename == NULL) {
14397         ctxt->directory = NULL;
14398     } else {
14399         ctxt->directory = xmlParserGetDirectory(filename);
14400     }
14401 
14402     inputStream = xmlNewInputStream(ctxt);
14403     if (inputStream == NULL) {
14404         xmlFreeParserInputBuffer(buf);
14405         return(1);
14406     }
14407 
14408     if (filename == NULL)
14409         inputStream->filename = NULL;
14410     else
14411         inputStream->filename = (char *)
14412             xmlCanonicPath((const xmlChar *) filename);
14413     inputStream->buf = buf;
14414     inputStream->base = inputStream->buf->buffer->content;
14415     inputStream->cur = inputStream->buf->buffer->content;
14416     inputStream->end =
14417         &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14418 
14419     inputPush(ctxt, inputStream);
14420 
14421     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14422         (ctxt->input->buf != NULL)) {
14423         int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14424         int cur = ctxt->input->cur - ctxt->input->base;
14425 
14426         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14427 
14428         ctxt->input->base = ctxt->input->buf->buffer->content + base;
14429         ctxt->input->cur = ctxt->input->base + cur;
14430         ctxt->input->end =
14431             &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14432                                                use];
14433 #ifdef DEBUG_PUSH
14434         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14435 #endif
14436     }
14437 
14438     if (encoding != NULL) {
14439         xmlCharEncodingHandlerPtr hdlr;
14440 
14441         if (ctxt->encoding != NULL)
14442 	    xmlFree((xmlChar *) ctxt->encoding);
14443         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14444 
14445         hdlr = xmlFindCharEncodingHandler(encoding);
14446         if (hdlr != NULL) {
14447             xmlSwitchToEncoding(ctxt, hdlr);
14448 	} else {
14449 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14450 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14451         }
14452     } else if (enc != XML_CHAR_ENCODING_NONE) {
14453         xmlSwitchEncoding(ctxt, enc);
14454     }
14455 
14456     return(0);
14457 }
14458 
14459 
14460 /**
14461  * xmlCtxtUseOptionsInternal:
14462  * @ctxt: an XML parser context
14463  * @options:  a combination of xmlParserOption
14464  * @encoding:  the user provided encoding to use
14465  *
14466  * Applies the options to the parser context
14467  *
14468  * Returns 0 in case of success, the set of unknown or unimplemented options
14469  *         in case of error.
14470  */
14471 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14472 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14473 {
14474     if (ctxt == NULL)
14475         return(-1);
14476     if (encoding != NULL) {
14477         if (ctxt->encoding != NULL)
14478 	    xmlFree((xmlChar *) ctxt->encoding);
14479         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14480     }
14481     if (options & XML_PARSE_RECOVER) {
14482         ctxt->recovery = 1;
14483         options -= XML_PARSE_RECOVER;
14484 	ctxt->options |= XML_PARSE_RECOVER;
14485     } else
14486         ctxt->recovery = 0;
14487     if (options & XML_PARSE_DTDLOAD) {
14488         ctxt->loadsubset = XML_DETECT_IDS;
14489         options -= XML_PARSE_DTDLOAD;
14490 	ctxt->options |= XML_PARSE_DTDLOAD;
14491     } else
14492         ctxt->loadsubset = 0;
14493     if (options & XML_PARSE_DTDATTR) {
14494         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14495         options -= XML_PARSE_DTDATTR;
14496 	ctxt->options |= XML_PARSE_DTDATTR;
14497     }
14498     if (options & XML_PARSE_NOENT) {
14499         ctxt->replaceEntities = 1;
14500         /* ctxt->loadsubset |= XML_DETECT_IDS; */
14501         options -= XML_PARSE_NOENT;
14502 	ctxt->options |= XML_PARSE_NOENT;
14503     } else
14504         ctxt->replaceEntities = 0;
14505     if (options & XML_PARSE_PEDANTIC) {
14506         ctxt->pedantic = 1;
14507         options -= XML_PARSE_PEDANTIC;
14508 	ctxt->options |= XML_PARSE_PEDANTIC;
14509     } else
14510         ctxt->pedantic = 0;
14511     if (options & XML_PARSE_NOBLANKS) {
14512         ctxt->keepBlanks = 0;
14513         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14514         options -= XML_PARSE_NOBLANKS;
14515 	ctxt->options |= XML_PARSE_NOBLANKS;
14516     } else
14517         ctxt->keepBlanks = 1;
14518     if (options & XML_PARSE_DTDVALID) {
14519         ctxt->validate = 1;
14520         if (options & XML_PARSE_NOWARNING)
14521             ctxt->vctxt.warning = NULL;
14522         if (options & XML_PARSE_NOERROR)
14523             ctxt->vctxt.error = NULL;
14524         options -= XML_PARSE_DTDVALID;
14525 	ctxt->options |= XML_PARSE_DTDVALID;
14526     } else
14527         ctxt->validate = 0;
14528     if (options & XML_PARSE_NOWARNING) {
14529         ctxt->sax->warning = NULL;
14530         options -= XML_PARSE_NOWARNING;
14531     }
14532     if (options & XML_PARSE_NOERROR) {
14533         ctxt->sax->error = NULL;
14534         ctxt->sax->fatalError = NULL;
14535         options -= XML_PARSE_NOERROR;
14536     }
14537 #ifdef LIBXML_SAX1_ENABLED
14538     if (options & XML_PARSE_SAX1) {
14539         ctxt->sax->startElement = xmlSAX2StartElement;
14540         ctxt->sax->endElement = xmlSAX2EndElement;
14541         ctxt->sax->startElementNs = NULL;
14542         ctxt->sax->endElementNs = NULL;
14543         ctxt->sax->initialized = 1;
14544         options -= XML_PARSE_SAX1;
14545 	ctxt->options |= XML_PARSE_SAX1;
14546     }
14547 #endif /* LIBXML_SAX1_ENABLED */
14548     if (options & XML_PARSE_NODICT) {
14549         ctxt->dictNames = 0;
14550         options -= XML_PARSE_NODICT;
14551 	ctxt->options |= XML_PARSE_NODICT;
14552     } else {
14553         ctxt->dictNames = 1;
14554     }
14555     if (options & XML_PARSE_NOCDATA) {
14556         ctxt->sax->cdataBlock = NULL;
14557         options -= XML_PARSE_NOCDATA;
14558 	ctxt->options |= XML_PARSE_NOCDATA;
14559     }
14560     if (options & XML_PARSE_NSCLEAN) {
14561 	ctxt->options |= XML_PARSE_NSCLEAN;
14562         options -= XML_PARSE_NSCLEAN;
14563     }
14564     if (options & XML_PARSE_NONET) {
14565 	ctxt->options |= XML_PARSE_NONET;
14566         options -= XML_PARSE_NONET;
14567     }
14568     if (options & XML_PARSE_COMPACT) {
14569 	ctxt->options |= XML_PARSE_COMPACT;
14570         options -= XML_PARSE_COMPACT;
14571     }
14572     if (options & XML_PARSE_OLD10) {
14573 	ctxt->options |= XML_PARSE_OLD10;
14574         options -= XML_PARSE_OLD10;
14575     }
14576     if (options & XML_PARSE_NOBASEFIX) {
14577 	ctxt->options |= XML_PARSE_NOBASEFIX;
14578         options -= XML_PARSE_NOBASEFIX;
14579     }
14580     if (options & XML_PARSE_HUGE) {
14581 	ctxt->options |= XML_PARSE_HUGE;
14582         options -= XML_PARSE_HUGE;
14583     }
14584     if (options & XML_PARSE_OLDSAX) {
14585 	ctxt->options |= XML_PARSE_OLDSAX;
14586         options -= XML_PARSE_OLDSAX;
14587     }
14588     if (options & XML_PARSE_IGNORE_ENC) {
14589 	ctxt->options |= XML_PARSE_IGNORE_ENC;
14590         options -= XML_PARSE_IGNORE_ENC;
14591     }
14592     ctxt->linenumbers = 1;
14593     return (options);
14594 }
14595 
14596 /**
14597  * xmlCtxtUseOptions:
14598  * @ctxt: an XML parser context
14599  * @options:  a combination of xmlParserOption
14600  *
14601  * Applies the options to the parser context
14602  *
14603  * Returns 0 in case of success, the set of unknown or unimplemented options
14604  *         in case of error.
14605  */
14606 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14607 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14608 {
14609    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14610 }
14611 
14612 /**
14613  * xmlDoRead:
14614  * @ctxt:  an XML parser context
14615  * @URL:  the base URL to use for the document
14616  * @encoding:  the document encoding, or NULL
14617  * @options:  a combination of xmlParserOption
14618  * @reuse:  keep the context for reuse
14619  *
14620  * Common front-end for the xmlRead functions
14621  *
14622  * Returns the resulting document tree or NULL
14623  */
14624 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14625 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14626           int options, int reuse)
14627 {
14628     xmlDocPtr ret;
14629 
14630     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14631     if (encoding != NULL) {
14632         xmlCharEncodingHandlerPtr hdlr;
14633 
14634 	hdlr = xmlFindCharEncodingHandler(encoding);
14635 	if (hdlr != NULL)
14636 	    xmlSwitchToEncoding(ctxt, hdlr);
14637     }
14638     if ((URL != NULL) && (ctxt->input != NULL) &&
14639         (ctxt->input->filename == NULL))
14640         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14641     xmlParseDocument(ctxt);
14642     if ((ctxt->wellFormed) || ctxt->recovery)
14643         ret = ctxt->myDoc;
14644     else {
14645         ret = NULL;
14646 	if (ctxt->myDoc != NULL) {
14647 	    xmlFreeDoc(ctxt->myDoc);
14648 	}
14649     }
14650     ctxt->myDoc = NULL;
14651     if (!reuse) {
14652 	xmlFreeParserCtxt(ctxt);
14653     }
14654 
14655     return (ret);
14656 }
14657 
14658 /**
14659  * xmlReadDoc:
14660  * @cur:  a pointer to a zero terminated string
14661  * @URL:  the base URL to use for the document
14662  * @encoding:  the document encoding, or NULL
14663  * @options:  a combination of xmlParserOption
14664  *
14665  * parse an XML in-memory document and build a tree.
14666  *
14667  * Returns the resulting document tree
14668  */
14669 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14670 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14671 {
14672     xmlParserCtxtPtr ctxt;
14673 
14674     if (cur == NULL)
14675         return (NULL);
14676 
14677     ctxt = xmlCreateDocParserCtxt(cur);
14678     if (ctxt == NULL)
14679         return (NULL);
14680     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14681 }
14682 
14683 /**
14684  * xmlReadFile:
14685  * @filename:  a file or URL
14686  * @encoding:  the document encoding, or NULL
14687  * @options:  a combination of xmlParserOption
14688  *
14689  * parse an XML file from the filesystem or the network.
14690  *
14691  * Returns the resulting document tree
14692  */
14693 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14694 xmlReadFile(const char *filename, const char *encoding, int options)
14695 {
14696     xmlParserCtxtPtr ctxt;
14697 
14698     ctxt = xmlCreateURLParserCtxt(filename, options);
14699     if (ctxt == NULL)
14700         return (NULL);
14701     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14702 }
14703 
14704 /**
14705  * xmlReadMemory:
14706  * @buffer:  a pointer to a char array
14707  * @size:  the size of the array
14708  * @URL:  the base URL to use for the document
14709  * @encoding:  the document encoding, or NULL
14710  * @options:  a combination of xmlParserOption
14711  *
14712  * parse an XML in-memory document and build a tree.
14713  *
14714  * Returns the resulting document tree
14715  */
14716 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14717 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14718 {
14719     xmlParserCtxtPtr ctxt;
14720 
14721     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14722     if (ctxt == NULL)
14723         return (NULL);
14724     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14725 }
14726 
14727 /**
14728  * xmlReadFd:
14729  * @fd:  an open file descriptor
14730  * @URL:  the base URL to use for the document
14731  * @encoding:  the document encoding, or NULL
14732  * @options:  a combination of xmlParserOption
14733  *
14734  * parse an XML from a file descriptor and build a tree.
14735  * NOTE that the file descriptor will not be closed when the
14736  *      reader is closed or reset.
14737  *
14738  * Returns the resulting document tree
14739  */
14740 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14741 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14742 {
14743     xmlParserCtxtPtr ctxt;
14744     xmlParserInputBufferPtr input;
14745     xmlParserInputPtr stream;
14746 
14747     if (fd < 0)
14748         return (NULL);
14749 
14750     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14751     if (input == NULL)
14752         return (NULL);
14753     input->closecallback = NULL;
14754     ctxt = xmlNewParserCtxt();
14755     if (ctxt == NULL) {
14756         xmlFreeParserInputBuffer(input);
14757         return (NULL);
14758     }
14759     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14760     if (stream == NULL) {
14761         xmlFreeParserInputBuffer(input);
14762 	xmlFreeParserCtxt(ctxt);
14763         return (NULL);
14764     }
14765     inputPush(ctxt, stream);
14766     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14767 }
14768 
14769 /**
14770  * xmlReadIO:
14771  * @ioread:  an I/O read function
14772  * @ioclose:  an I/O close function
14773  * @ioctx:  an I/O handler
14774  * @URL:  the base URL to use for the document
14775  * @encoding:  the document encoding, or NULL
14776  * @options:  a combination of xmlParserOption
14777  *
14778  * parse an XML document from I/O functions and source and build a tree.
14779  *
14780  * Returns the resulting document tree
14781  */
14782 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14783 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14784           void *ioctx, const char *URL, const char *encoding, int options)
14785 {
14786     xmlParserCtxtPtr ctxt;
14787     xmlParserInputBufferPtr input;
14788     xmlParserInputPtr stream;
14789 
14790     if (ioread == NULL)
14791         return (NULL);
14792 
14793     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14794                                          XML_CHAR_ENCODING_NONE);
14795     if (input == NULL)
14796         return (NULL);
14797     ctxt = xmlNewParserCtxt();
14798     if (ctxt == NULL) {
14799         xmlFreeParserInputBuffer(input);
14800         return (NULL);
14801     }
14802     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14803     if (stream == NULL) {
14804         xmlFreeParserInputBuffer(input);
14805 	xmlFreeParserCtxt(ctxt);
14806         return (NULL);
14807     }
14808     inputPush(ctxt, stream);
14809     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14810 }
14811 
14812 /**
14813  * xmlCtxtReadDoc:
14814  * @ctxt:  an XML parser context
14815  * @cur:  a pointer to a zero terminated string
14816  * @URL:  the base URL to use for the document
14817  * @encoding:  the document encoding, or NULL
14818  * @options:  a combination of xmlParserOption
14819  *
14820  * parse an XML in-memory document and build a tree.
14821  * This reuses the existing @ctxt parser context
14822  *
14823  * Returns the resulting document tree
14824  */
14825 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)14826 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14827                const char *URL, const char *encoding, int options)
14828 {
14829     xmlParserInputPtr stream;
14830 
14831     if (cur == NULL)
14832         return (NULL);
14833     if (ctxt == NULL)
14834         return (NULL);
14835 
14836     xmlCtxtReset(ctxt);
14837 
14838     stream = xmlNewStringInputStream(ctxt, cur);
14839     if (stream == NULL) {
14840         return (NULL);
14841     }
14842     inputPush(ctxt, stream);
14843     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14844 }
14845 
14846 /**
14847  * xmlCtxtReadFile:
14848  * @ctxt:  an XML parser context
14849  * @filename:  a file or URL
14850  * @encoding:  the document encoding, or NULL
14851  * @options:  a combination of xmlParserOption
14852  *
14853  * parse an XML file from the filesystem or the network.
14854  * This reuses the existing @ctxt parser context
14855  *
14856  * Returns the resulting document tree
14857  */
14858 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)14859 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14860                 const char *encoding, int options)
14861 {
14862     xmlParserInputPtr stream;
14863 
14864     if (filename == NULL)
14865         return (NULL);
14866     if (ctxt == NULL)
14867         return (NULL);
14868 
14869     xmlCtxtReset(ctxt);
14870 
14871     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14872     if (stream == NULL) {
14873         return (NULL);
14874     }
14875     inputPush(ctxt, stream);
14876     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14877 }
14878 
14879 /**
14880  * xmlCtxtReadMemory:
14881  * @ctxt:  an XML parser context
14882  * @buffer:  a pointer to a char array
14883  * @size:  the size of the array
14884  * @URL:  the base URL to use for the document
14885  * @encoding:  the document encoding, or NULL
14886  * @options:  a combination of xmlParserOption
14887  *
14888  * parse an XML in-memory document and build a tree.
14889  * This reuses the existing @ctxt parser context
14890  *
14891  * Returns the resulting document tree
14892  */
14893 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14894 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14895                   const char *URL, const char *encoding, int options)
14896 {
14897     xmlParserInputBufferPtr input;
14898     xmlParserInputPtr stream;
14899 
14900     if (ctxt == NULL)
14901         return (NULL);
14902     if (buffer == NULL)
14903         return (NULL);
14904 
14905     xmlCtxtReset(ctxt);
14906 
14907     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14908     if (input == NULL) {
14909 	return(NULL);
14910     }
14911 
14912     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14913     if (stream == NULL) {
14914 	xmlFreeParserInputBuffer(input);
14915 	return(NULL);
14916     }
14917 
14918     inputPush(ctxt, stream);
14919     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14920 }
14921 
14922 /**
14923  * xmlCtxtReadFd:
14924  * @ctxt:  an XML parser context
14925  * @fd:  an open file descriptor
14926  * @URL:  the base URL to use for the document
14927  * @encoding:  the document encoding, or NULL
14928  * @options:  a combination of xmlParserOption
14929  *
14930  * parse an XML from a file descriptor and build a tree.
14931  * This reuses the existing @ctxt parser context
14932  * NOTE that the file descriptor will not be closed when the
14933  *      reader is closed or reset.
14934  *
14935  * Returns the resulting document tree
14936  */
14937 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14938 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14939               const char *URL, const char *encoding, int options)
14940 {
14941     xmlParserInputBufferPtr input;
14942     xmlParserInputPtr stream;
14943 
14944     if (fd < 0)
14945         return (NULL);
14946     if (ctxt == NULL)
14947         return (NULL);
14948 
14949     xmlCtxtReset(ctxt);
14950 
14951 
14952     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14953     if (input == NULL)
14954         return (NULL);
14955     input->closecallback = NULL;
14956     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14957     if (stream == NULL) {
14958         xmlFreeParserInputBuffer(input);
14959         return (NULL);
14960     }
14961     inputPush(ctxt, stream);
14962     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14963 }
14964 
14965 /**
14966  * xmlCtxtReadIO:
14967  * @ctxt:  an XML parser context
14968  * @ioread:  an I/O read function
14969  * @ioclose:  an I/O close function
14970  * @ioctx:  an I/O handler
14971  * @URL:  the base URL to use for the document
14972  * @encoding:  the document encoding, or NULL
14973  * @options:  a combination of xmlParserOption
14974  *
14975  * parse an XML document from I/O functions and source and build a tree.
14976  * This reuses the existing @ctxt parser context
14977  *
14978  * Returns the resulting document tree
14979  */
14980 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14981 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14982               xmlInputCloseCallback ioclose, void *ioctx,
14983 	      const char *URL,
14984               const char *encoding, int options)
14985 {
14986     xmlParserInputBufferPtr input;
14987     xmlParserInputPtr stream;
14988 
14989     if (ioread == NULL)
14990         return (NULL);
14991     if (ctxt == NULL)
14992         return (NULL);
14993 
14994     xmlCtxtReset(ctxt);
14995 
14996     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14997                                          XML_CHAR_ENCODING_NONE);
14998     if (input == NULL)
14999         return (NULL);
15000     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15001     if (stream == NULL) {
15002         xmlFreeParserInputBuffer(input);
15003         return (NULL);
15004     }
15005     inputPush(ctxt, stream);
15006     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15007 }
15008 
15009 #define bottom_parser
15010 #include "elfgcchack.h"
15011