• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * daniel@veillard.com
31  */
32 
33 #define IN_LIBXML
34 #include "libxml.h"
35 
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41 
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
82 
83 static void
84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85 
86 static xmlParserCtxtPtr
87 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 	                  const xmlChar *base, xmlParserCtxtPtr pctx);
89 
90 /************************************************************************
91  *									*
92  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
93  *									*
94  ************************************************************************/
95 
96 #define XML_PARSER_BIG_ENTITY 1000
97 #define XML_PARSER_LOT_ENTITY 5000
98 
99 /*
100  * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101  *    replacement over the size in byte of the input indicates that you have
102  *    and eponential behaviour. A value of 10 correspond to at least 3 entity
103  *    replacement per byte of input.
104  */
105 #define XML_PARSER_NON_LINEAR 10
106 
107 /*
108  * xmlParserEntityCheck
109  *
110  * Function to check non-linear entity expansion behaviour
111  * This is here to detect and stop exponential linear entity expansion
112  * This is not a limitation of the parser but a safety
113  * boundary feature. It can be disabled with the XML_PARSE_HUGE
114  * parser option.
115  */
116 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long size,xmlEntityPtr ent)117 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118                      xmlEntityPtr ent)
119 {
120     unsigned long consumed = 0;
121 
122     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123         return (0);
124     if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125         return (1);
126     if (size != 0) {
127         /*
128          * Do the check based on the replacement size of the entity
129          */
130         if (size < XML_PARSER_BIG_ENTITY)
131 	    return(0);
132 
133         /*
134          * A limit on the amount of text data reasonably used
135          */
136         if (ctxt->input != NULL) {
137             consumed = ctxt->input->consumed +
138                 (ctxt->input->cur - ctxt->input->base);
139         }
140         consumed += ctxt->sizeentities;
141 
142         if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 	    (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144             return (0);
145     } else if (ent != NULL) {
146         /*
147          * use the number of parsed entities in the replacement
148          */
149         size = ent->checked;
150 
151         /*
152          * The amount of data parsed counting entities size only once
153          */
154         if (ctxt->input != NULL) {
155             consumed = ctxt->input->consumed +
156                 (ctxt->input->cur - ctxt->input->base);
157         }
158         consumed += ctxt->sizeentities;
159 
160         /*
161          * Check the density of entities for the amount of data
162 	 * knowing an entity reference will take at least 3 bytes
163          */
164         if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165             return (0);
166     } else {
167         /*
168          * strange we got no data for checking just return
169          */
170         return (0);
171     }
172 
173     xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174     return (1);
175 }
176 
177 /**
178  * xmlParserMaxDepth:
179  *
180  * arbitrary depth limit for the XML documents that we allow to
181  * process. This is not a limitation of the parser but a safety
182  * boundary feature. It can be disabled with the XML_PARSE_HUGE
183  * parser option.
184  */
185 unsigned int xmlParserMaxDepth = 256;
186 
187 
188 
189 #define SAX2 1
190 #define XML_PARSER_BIG_BUFFER_SIZE 300
191 #define XML_PARSER_BUFFER_SIZE 100
192 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193 
194 /*
195  * List of XML prefixed PI allowed by W3C specs
196  */
197 
198 static const char *xmlW3CPIs[] = {
199     "xml-stylesheet",
200     NULL
201 };
202 
203 
204 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206                                               const xmlChar **str);
207 
208 static xmlParserErrors
209 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 	              xmlSAXHandlerPtr sax,
211 		      void *user_data, int depth, const xmlChar *URL,
212 		      const xmlChar *ID, xmlNodePtr *list);
213 
214 static int
215 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216                           const char *encoding);
217 #ifdef LIBXML_LEGACY_ENABLED
218 static void
219 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220                       xmlNodePtr lastNode);
221 #endif /* LIBXML_LEGACY_ENABLED */
222 
223 static xmlParserErrors
224 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 		      const xmlChar *string, void *user_data, xmlNodePtr *lst);
226 
227 static int
228 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229 
230 /************************************************************************
231  *									*
232  * 		Some factorized error routines				*
233  *									*
234  ************************************************************************/
235 
236 /**
237  * xmlErrAttributeDup:
238  * @ctxt:  an XML parser context
239  * @prefix:  the attribute prefix
240  * @localname:  the attribute localname
241  *
242  * Handle a redefinition of attribute error
243  */
244 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)245 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246                    const xmlChar * localname)
247 {
248     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249         (ctxt->instate == XML_PARSER_EOF))
250 	return;
251     if (ctxt != NULL)
252 	ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
253 
254     if (prefix == NULL)
255         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257                         (const char *) localname, NULL, NULL, 0, 0,
258                         "Attribute %s redefined\n", localname);
259     else
260         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
261                         XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
262                         (const char *) prefix, (const char *) localname,
263                         NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264                         localname);
265     if (ctxt != NULL) {
266 	ctxt->wellFormed = 0;
267 	if (ctxt->recovery == 0)
268 	    ctxt->disableSAX = 1;
269     }
270 }
271 
272 /**
273  * xmlFatalErr:
274  * @ctxt:  an XML parser context
275  * @error:  the error number
276  * @extra:  extra information string
277  *
278  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279  */
280 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)281 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
282 {
283     const char *errmsg;
284 
285     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286         (ctxt->instate == XML_PARSER_EOF))
287 	return;
288     switch (error) {
289         case XML_ERR_INVALID_HEX_CHARREF:
290             errmsg = "CharRef: invalid hexadecimal value\n";
291             break;
292         case XML_ERR_INVALID_DEC_CHARREF:
293             errmsg = "CharRef: invalid decimal value\n";
294             break;
295         case XML_ERR_INVALID_CHARREF:
296             errmsg = "CharRef: invalid value\n";
297             break;
298         case XML_ERR_INTERNAL_ERROR:
299             errmsg = "internal error";
300             break;
301         case XML_ERR_PEREF_AT_EOF:
302             errmsg = "PEReference at end of document\n";
303             break;
304         case XML_ERR_PEREF_IN_PROLOG:
305             errmsg = "PEReference in prolog\n";
306             break;
307         case XML_ERR_PEREF_IN_EPILOG:
308             errmsg = "PEReference in epilog\n";
309             break;
310         case XML_ERR_PEREF_NO_NAME:
311             errmsg = "PEReference: no name\n";
312             break;
313         case XML_ERR_PEREF_SEMICOL_MISSING:
314             errmsg = "PEReference: expecting ';'\n";
315             break;
316         case XML_ERR_ENTITY_LOOP:
317             errmsg = "Detected an entity reference loop\n";
318             break;
319         case XML_ERR_ENTITY_NOT_STARTED:
320             errmsg = "EntityValue: \" or ' expected\n";
321             break;
322         case XML_ERR_ENTITY_PE_INTERNAL:
323             errmsg = "PEReferences forbidden in internal subset\n";
324             break;
325         case XML_ERR_ENTITY_NOT_FINISHED:
326             errmsg = "EntityValue: \" or ' expected\n";
327             break;
328         case XML_ERR_ATTRIBUTE_NOT_STARTED:
329             errmsg = "AttValue: \" or ' expected\n";
330             break;
331         case XML_ERR_LT_IN_ATTRIBUTE:
332             errmsg = "Unescaped '<' not allowed in attributes values\n";
333             break;
334         case XML_ERR_LITERAL_NOT_STARTED:
335             errmsg = "SystemLiteral \" or ' expected\n";
336             break;
337         case XML_ERR_LITERAL_NOT_FINISHED:
338             errmsg = "Unfinished System or Public ID \" or ' expected\n";
339             break;
340         case XML_ERR_MISPLACED_CDATA_END:
341             errmsg = "Sequence ']]>' not allowed in content\n";
342             break;
343         case XML_ERR_URI_REQUIRED:
344             errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345             break;
346         case XML_ERR_PUBID_REQUIRED:
347             errmsg = "PUBLIC, the Public Identifier is missing\n";
348             break;
349         case XML_ERR_HYPHEN_IN_COMMENT:
350             errmsg = "Comment must not contain '--' (double-hyphen)\n";
351             break;
352         case XML_ERR_PI_NOT_STARTED:
353             errmsg = "xmlParsePI : no target name\n";
354             break;
355         case XML_ERR_RESERVED_XML_NAME:
356             errmsg = "Invalid PI name\n";
357             break;
358         case XML_ERR_NOTATION_NOT_STARTED:
359             errmsg = "NOTATION: Name expected here\n";
360             break;
361         case XML_ERR_NOTATION_NOT_FINISHED:
362             errmsg = "'>' required to close NOTATION declaration\n";
363             break;
364         case XML_ERR_VALUE_REQUIRED:
365             errmsg = "Entity value required\n";
366             break;
367         case XML_ERR_URI_FRAGMENT:
368             errmsg = "Fragment not allowed";
369             break;
370         case XML_ERR_ATTLIST_NOT_STARTED:
371             errmsg = "'(' required to start ATTLIST enumeration\n";
372             break;
373         case XML_ERR_NMTOKEN_REQUIRED:
374             errmsg = "NmToken expected in ATTLIST enumeration\n";
375             break;
376         case XML_ERR_ATTLIST_NOT_FINISHED:
377             errmsg = "')' required to finish ATTLIST enumeration\n";
378             break;
379         case XML_ERR_MIXED_NOT_STARTED:
380             errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381             break;
382         case XML_ERR_PCDATA_REQUIRED:
383             errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384             break;
385         case XML_ERR_ELEMCONTENT_NOT_STARTED:
386             errmsg = "ContentDecl : Name or '(' expected\n";
387             break;
388         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
389             errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390             break;
391         case XML_ERR_PEREF_IN_INT_SUBSET:
392             errmsg =
393                 "PEReference: forbidden within markup decl in internal subset\n";
394             break;
395         case XML_ERR_GT_REQUIRED:
396             errmsg = "expected '>'\n";
397             break;
398         case XML_ERR_CONDSEC_INVALID:
399             errmsg = "XML conditional section '[' expected\n";
400             break;
401         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
402             errmsg = "Content error in the external subset\n";
403             break;
404         case XML_ERR_CONDSEC_INVALID_KEYWORD:
405             errmsg =
406                 "conditional section INCLUDE or IGNORE keyword expected\n";
407             break;
408         case XML_ERR_CONDSEC_NOT_FINISHED:
409             errmsg = "XML conditional section not closed\n";
410             break;
411         case XML_ERR_XMLDECL_NOT_STARTED:
412             errmsg = "Text declaration '<?xml' required\n";
413             break;
414         case XML_ERR_XMLDECL_NOT_FINISHED:
415             errmsg = "parsing XML declaration: '?>' expected\n";
416             break;
417         case XML_ERR_EXT_ENTITY_STANDALONE:
418             errmsg = "external parsed entities cannot be standalone\n";
419             break;
420         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
421             errmsg = "EntityRef: expecting ';'\n";
422             break;
423         case XML_ERR_DOCTYPE_NOT_FINISHED:
424             errmsg = "DOCTYPE improperly terminated\n";
425             break;
426         case XML_ERR_LTSLASH_REQUIRED:
427             errmsg = "EndTag: '</' not found\n";
428             break;
429         case XML_ERR_EQUAL_REQUIRED:
430             errmsg = "expected '='\n";
431             break;
432         case XML_ERR_STRING_NOT_CLOSED:
433             errmsg = "String not closed expecting \" or '\n";
434             break;
435         case XML_ERR_STRING_NOT_STARTED:
436             errmsg = "String not started expecting ' or \"\n";
437             break;
438         case XML_ERR_ENCODING_NAME:
439             errmsg = "Invalid XML encoding name\n";
440             break;
441         case XML_ERR_STANDALONE_VALUE:
442             errmsg = "standalone accepts only 'yes' or 'no'\n";
443             break;
444         case XML_ERR_DOCUMENT_EMPTY:
445             errmsg = "Document is empty\n";
446             break;
447         case XML_ERR_DOCUMENT_END:
448             errmsg = "Extra content at the end of the document\n";
449             break;
450         case XML_ERR_NOT_WELL_BALANCED:
451             errmsg = "chunk is not well balanced\n";
452             break;
453         case XML_ERR_EXTRA_CONTENT:
454             errmsg = "extra content at the end of well balanced chunk\n";
455             break;
456         case XML_ERR_VERSION_MISSING:
457             errmsg = "Malformed declaration expecting version\n";
458             break;
459 #if 0
460         case:
461             errmsg = "\n";
462             break;
463 #endif
464         default:
465             errmsg = "Unregistered error message\n";
466     }
467     if (ctxt != NULL)
468 	ctxt->errNo = error;
469     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
470                     XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471                     info);
472     if (ctxt != NULL) {
473 	ctxt->wellFormed = 0;
474 	if (ctxt->recovery == 0)
475 	    ctxt->disableSAX = 1;
476     }
477 }
478 
479 /**
480  * xmlFatalErrMsg:
481  * @ctxt:  an XML parser context
482  * @error:  the error number
483  * @msg:  the error message
484  *
485  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486  */
487 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)488 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489                const char *msg)
490 {
491     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492         (ctxt->instate == XML_PARSER_EOF))
493 	return;
494     if (ctxt != NULL)
495 	ctxt->errNo = error;
496     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
497                     XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
498     if (ctxt != NULL) {
499 	ctxt->wellFormed = 0;
500 	if (ctxt->recovery == 0)
501 	    ctxt->disableSAX = 1;
502     }
503 }
504 
505 /**
506  * xmlWarningMsg:
507  * @ctxt:  an XML parser context
508  * @error:  the error number
509  * @msg:  the error message
510  * @str1:  extra data
511  * @str2:  extra data
512  *
513  * Handle a warning.
514  */
515 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)516 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517               const char *msg, const xmlChar *str1, const xmlChar *str2)
518 {
519     xmlStructuredErrorFunc schannel = NULL;
520 
521     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522         (ctxt->instate == XML_PARSER_EOF))
523 	return;
524     if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525         (ctxt->sax->initialized == XML_SAX2_MAGIC))
526         schannel = ctxt->sax->serror;
527     if (ctxt != NULL) {
528         __xmlRaiseError(schannel,
529                     (ctxt->sax) ? ctxt->sax->warning : NULL,
530                     ctxt->userData,
531                     ctxt, NULL, XML_FROM_PARSER, error,
532                     XML_ERR_WARNING, NULL, 0,
533 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
534 		    msg, (const char *) str1, (const char *) str2);
535     } else {
536         __xmlRaiseError(schannel, NULL, NULL,
537                     ctxt, NULL, XML_FROM_PARSER, error,
538                     XML_ERR_WARNING, NULL, 0,
539 		    (const char *) str1, (const char *) str2, NULL, 0, 0,
540 		    msg, (const char *) str1, (const char *) str2);
541     }
542 }
543 
544 /**
545  * xmlValidityError:
546  * @ctxt:  an XML parser context
547  * @error:  the error number
548  * @msg:  the error message
549  * @str1:  extra data
550  *
551  * Handle a validity error.
552  */
553 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)554 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
555               const char *msg, const xmlChar *str1, const xmlChar *str2)
556 {
557     xmlStructuredErrorFunc schannel = NULL;
558 
559     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560         (ctxt->instate == XML_PARSER_EOF))
561 	return;
562     if (ctxt != NULL) {
563 	ctxt->errNo = error;
564 	if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 	    schannel = ctxt->sax->serror;
566     }
567     if (ctxt != NULL) {
568         __xmlRaiseError(schannel,
569                     ctxt->vctxt.error, ctxt->vctxt.userData,
570                     ctxt, NULL, XML_FROM_DTD, error,
571                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
572 		    (const char *) str2, NULL, 0, 0,
573 		    msg, (const char *) str1, (const char *) str2);
574 	ctxt->valid = 0;
575     } else {
576         __xmlRaiseError(schannel, NULL, NULL,
577                     ctxt, NULL, XML_FROM_DTD, error,
578                     XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 		    (const char *) str2, NULL, 0, 0,
580 		    msg, (const char *) str1, (const char *) str2);
581     }
582 }
583 
584 /**
585  * xmlFatalErrMsgInt:
586  * @ctxt:  an XML parser context
587  * @error:  the error number
588  * @msg:  the error message
589  * @val:  an integer value
590  *
591  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592  */
593 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)594 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595                   const char *msg, int val)
596 {
597     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598         (ctxt->instate == XML_PARSER_EOF))
599 	return;
600     if (ctxt != NULL)
601 	ctxt->errNo = error;
602     __xmlRaiseError(NULL, NULL, NULL,
603                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
605     if (ctxt != NULL) {
606 	ctxt->wellFormed = 0;
607 	if (ctxt->recovery == 0)
608 	    ctxt->disableSAX = 1;
609     }
610 }
611 
612 /**
613  * xmlFatalErrMsgStrIntStr:
614  * @ctxt:  an XML parser context
615  * @error:  the error number
616  * @msg:  the error message
617  * @str1:  an string info
618  * @val:  an integer value
619  * @str2:  an string info
620  *
621  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622  */
623 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)624 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625                   const char *msg, const xmlChar *str1, int val,
626 		  const xmlChar *str2)
627 {
628     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629         (ctxt->instate == XML_PARSER_EOF))
630 	return;
631     if (ctxt != NULL)
632 	ctxt->errNo = error;
633     __xmlRaiseError(NULL, NULL, NULL,
634                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635                     NULL, 0, (const char *) str1, (const char *) str2,
636 		    NULL, val, 0, msg, str1, val, str2);
637     if (ctxt != NULL) {
638 	ctxt->wellFormed = 0;
639 	if (ctxt->recovery == 0)
640 	    ctxt->disableSAX = 1;
641     }
642 }
643 
644 /**
645  * xmlFatalErrMsgStr:
646  * @ctxt:  an XML parser context
647  * @error:  the error number
648  * @msg:  the error message
649  * @val:  a string value
650  *
651  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652  */
653 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)654 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
655                   const char *msg, const xmlChar * val)
656 {
657     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658         (ctxt->instate == XML_PARSER_EOF))
659 	return;
660     if (ctxt != NULL)
661 	ctxt->errNo = error;
662     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
663                     XML_FROM_PARSER, error, XML_ERR_FATAL,
664                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665                     val);
666     if (ctxt != NULL) {
667 	ctxt->wellFormed = 0;
668 	if (ctxt->recovery == 0)
669 	    ctxt->disableSAX = 1;
670     }
671 }
672 
673 /**
674  * xmlErrMsgStr:
675  * @ctxt:  an XML parser context
676  * @error:  the error number
677  * @msg:  the error message
678  * @val:  a string value
679  *
680  * Handle a non fatal parser error
681  */
682 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)683 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684                   const char *msg, const xmlChar * val)
685 {
686     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687         (ctxt->instate == XML_PARSER_EOF))
688 	return;
689     if (ctxt != NULL)
690 	ctxt->errNo = error;
691     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
692                     XML_FROM_PARSER, error, XML_ERR_ERROR,
693                     NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694                     val);
695 }
696 
697 /**
698  * xmlNsErr:
699  * @ctxt:  an XML parser context
700  * @error:  the error number
701  * @msg:  the message
702  * @info1:  extra information string
703  * @info2:  extra information string
704  *
705  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706  */
707 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)708 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709          const char *msg,
710          const xmlChar * info1, const xmlChar * info2,
711          const xmlChar * info3)
712 {
713     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714         (ctxt->instate == XML_PARSER_EOF))
715 	return;
716     if (ctxt != NULL)
717 	ctxt->errNo = error;
718     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
719                     XML_ERR_ERROR, NULL, 0, (const char *) info1,
720                     (const char *) info2, (const char *) info3, 0, 0, msg,
721                     info1, info2, info3);
722     if (ctxt != NULL)
723 	ctxt->nsWellFormed = 0;
724 }
725 
726 /**
727  * xmlNsWarn
728  * @ctxt:  an XML parser context
729  * @error:  the error number
730  * @msg:  the message
731  * @info1:  extra information string
732  * @info2:  extra information string
733  *
734  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735  */
736 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)737 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738          const char *msg,
739          const xmlChar * info1, const xmlChar * info2,
740          const xmlChar * info3)
741 {
742     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743         (ctxt->instate == XML_PARSER_EOF))
744 	return;
745     __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746                     XML_ERR_WARNING, NULL, 0, (const char *) info1,
747                     (const char *) info2, (const char *) info3, 0, 0, msg,
748                     info1, info2, info3);
749 }
750 
751 /************************************************************************
752  *									*
753  * 		Library wide options					*
754  *									*
755  ************************************************************************/
756 
757 /**
758   * xmlHasFeature:
759   * @feature: the feature to be examined
760   *
761   * Examines if the library has been compiled with a given feature.
762   *
763   * Returns a non-zero value if the feature exist, otherwise zero.
764   * Returns zero (0) if the feature does not exist or an unknown
765   * unknown feature is requested, non-zero otherwise.
766   */
767 int
xmlHasFeature(xmlFeature feature)768 xmlHasFeature(xmlFeature feature)
769 {
770     switch (feature) {
771 	case XML_WITH_THREAD:
772 #ifdef LIBXML_THREAD_ENABLED
773 	    return(1);
774 #else
775 	    return(0);
776 #endif
777         case XML_WITH_TREE:
778 #ifdef LIBXML_TREE_ENABLED
779             return(1);
780 #else
781             return(0);
782 #endif
783         case XML_WITH_OUTPUT:
784 #ifdef LIBXML_OUTPUT_ENABLED
785             return(1);
786 #else
787             return(0);
788 #endif
789         case XML_WITH_PUSH:
790 #ifdef LIBXML_PUSH_ENABLED
791             return(1);
792 #else
793             return(0);
794 #endif
795         case XML_WITH_READER:
796 #ifdef LIBXML_READER_ENABLED
797             return(1);
798 #else
799             return(0);
800 #endif
801         case XML_WITH_PATTERN:
802 #ifdef LIBXML_PATTERN_ENABLED
803             return(1);
804 #else
805             return(0);
806 #endif
807         case XML_WITH_WRITER:
808 #ifdef LIBXML_WRITER_ENABLED
809             return(1);
810 #else
811             return(0);
812 #endif
813         case XML_WITH_SAX1:
814 #ifdef LIBXML_SAX1_ENABLED
815             return(1);
816 #else
817             return(0);
818 #endif
819         case XML_WITH_FTP:
820 #ifdef LIBXML_FTP_ENABLED
821             return(1);
822 #else
823             return(0);
824 #endif
825         case XML_WITH_HTTP:
826 #ifdef LIBXML_HTTP_ENABLED
827             return(1);
828 #else
829             return(0);
830 #endif
831         case XML_WITH_VALID:
832 #ifdef LIBXML_VALID_ENABLED
833             return(1);
834 #else
835             return(0);
836 #endif
837         case XML_WITH_HTML:
838 #ifdef LIBXML_HTML_ENABLED
839             return(1);
840 #else
841             return(0);
842 #endif
843         case XML_WITH_LEGACY:
844 #ifdef LIBXML_LEGACY_ENABLED
845             return(1);
846 #else
847             return(0);
848 #endif
849         case XML_WITH_C14N:
850 #ifdef LIBXML_C14N_ENABLED
851             return(1);
852 #else
853             return(0);
854 #endif
855         case XML_WITH_CATALOG:
856 #ifdef LIBXML_CATALOG_ENABLED
857             return(1);
858 #else
859             return(0);
860 #endif
861         case XML_WITH_XPATH:
862 #ifdef LIBXML_XPATH_ENABLED
863             return(1);
864 #else
865             return(0);
866 #endif
867         case XML_WITH_XPTR:
868 #ifdef LIBXML_XPTR_ENABLED
869             return(1);
870 #else
871             return(0);
872 #endif
873         case XML_WITH_XINCLUDE:
874 #ifdef LIBXML_XINCLUDE_ENABLED
875             return(1);
876 #else
877             return(0);
878 #endif
879         case XML_WITH_ICONV:
880 #ifdef LIBXML_ICONV_ENABLED
881             return(1);
882 #else
883             return(0);
884 #endif
885         case XML_WITH_ISO8859X:
886 #ifdef LIBXML_ISO8859X_ENABLED
887             return(1);
888 #else
889             return(0);
890 #endif
891         case XML_WITH_UNICODE:
892 #ifdef LIBXML_UNICODE_ENABLED
893             return(1);
894 #else
895             return(0);
896 #endif
897         case XML_WITH_REGEXP:
898 #ifdef LIBXML_REGEXP_ENABLED
899             return(1);
900 #else
901             return(0);
902 #endif
903         case XML_WITH_AUTOMATA:
904 #ifdef LIBXML_AUTOMATA_ENABLED
905             return(1);
906 #else
907             return(0);
908 #endif
909         case XML_WITH_EXPR:
910 #ifdef LIBXML_EXPR_ENABLED
911             return(1);
912 #else
913             return(0);
914 #endif
915         case XML_WITH_SCHEMAS:
916 #ifdef LIBXML_SCHEMAS_ENABLED
917             return(1);
918 #else
919             return(0);
920 #endif
921         case XML_WITH_SCHEMATRON:
922 #ifdef LIBXML_SCHEMATRON_ENABLED
923             return(1);
924 #else
925             return(0);
926 #endif
927         case XML_WITH_MODULES:
928 #ifdef LIBXML_MODULES_ENABLED
929             return(1);
930 #else
931             return(0);
932 #endif
933         case XML_WITH_DEBUG:
934 #ifdef LIBXML_DEBUG_ENABLED
935             return(1);
936 #else
937             return(0);
938 #endif
939         case XML_WITH_DEBUG_MEM:
940 #ifdef DEBUG_MEMORY_LOCATION
941             return(1);
942 #else
943             return(0);
944 #endif
945         case XML_WITH_DEBUG_RUN:
946 #ifdef LIBXML_DEBUG_RUNTIME
947             return(1);
948 #else
949             return(0);
950 #endif
951         case XML_WITH_ZLIB:
952 #ifdef LIBXML_ZLIB_ENABLED
953             return(1);
954 #else
955             return(0);
956 #endif
957         case XML_WITH_ICU:
958 #ifdef LIBXML_ICU_ENABLED
959             return(1);
960 #else
961             return(0);
962 #endif
963         default:
964 	    break;
965      }
966      return(0);
967 }
968 
969 /************************************************************************
970  *									*
971  * 		SAX2 defaulted attributes handling			*
972  *									*
973  ************************************************************************/
974 
975 /**
976  * xmlDetectSAX2:
977  * @ctxt:  an XML parser context
978  *
979  * Do the SAX2 detection and specific intialization
980  */
981 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)982 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
983     if (ctxt == NULL) return;
984 #ifdef LIBXML_SAX1_ENABLED
985     if ((ctxt->sax) &&  (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
986         ((ctxt->sax->startElementNs != NULL) ||
987          (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
988 #else
989     ctxt->sax2 = 1;
990 #endif /* LIBXML_SAX1_ENABLED */
991 
992     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
993     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
994     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
995     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
996     		(ctxt->str_xml_ns == NULL)) {
997         xmlErrMemory(ctxt, NULL);
998     }
999 }
1000 
1001 typedef struct _xmlDefAttrs xmlDefAttrs;
1002 typedef xmlDefAttrs *xmlDefAttrsPtr;
1003 struct _xmlDefAttrs {
1004     int nbAttrs;	/* number of defaulted attributes on that element */
1005     int maxAttrs;       /* the size of the array */
1006     const xmlChar *values[5]; /* array of localname/prefix/values/external */
1007 };
1008 
1009 /**
1010  * xmlAttrNormalizeSpace:
1011  * @src: the source string
1012  * @dst: the target string
1013  *
1014  * Normalize the space in non CDATA attribute values:
1015  * If the attribute type is not CDATA, then the XML processor MUST further
1016  * process the normalized attribute value by discarding any leading and
1017  * trailing space (#x20) characters, and by replacing sequences of space
1018  * (#x20) characters by a single space (#x20) character.
1019  * Note that the size of dst need to be at least src, and if one doesn't need
1020  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021  * passing src as dst is just fine.
1022  *
1023  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1024  *         is needed.
1025  */
1026 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1027 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1028 {
1029     if ((src == NULL) || (dst == NULL))
1030         return(NULL);
1031 
1032     while (*src == 0x20) src++;
1033     while (*src != 0) {
1034 	if (*src == 0x20) {
1035 	    while (*src == 0x20) src++;
1036 	    if (*src != 0)
1037 		*dst++ = 0x20;
1038 	} else {
1039 	    *dst++ = *src++;
1040 	}
1041     }
1042     *dst = 0;
1043     if (dst == src)
1044        return(NULL);
1045     return(dst);
1046 }
1047 
1048 /**
1049  * xmlAttrNormalizeSpace2:
1050  * @src: the source string
1051  *
1052  * Normalize the space in non CDATA attribute values, a slightly more complex
1053  * front end to avoid allocation problems when running on attribute values
1054  * coming from the input.
1055  *
1056  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057  *         is needed.
1058  */
1059 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1060 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1061 {
1062     int i;
1063     int remove_head = 0;
1064     int need_realloc = 0;
1065     const xmlChar *cur;
1066 
1067     if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1068         return(NULL);
1069     i = *len;
1070     if (i <= 0)
1071         return(NULL);
1072 
1073     cur = src;
1074     while (*cur == 0x20) {
1075         cur++;
1076 	remove_head++;
1077     }
1078     while (*cur != 0) {
1079 	if (*cur == 0x20) {
1080 	    cur++;
1081 	    if ((*cur == 0x20) || (*cur == 0)) {
1082 	        need_realloc = 1;
1083 		break;
1084 	    }
1085 	} else
1086 	    cur++;
1087     }
1088     if (need_realloc) {
1089         xmlChar *ret;
1090 
1091 	ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1092 	if (ret == NULL) {
1093 	    xmlErrMemory(ctxt, NULL);
1094 	    return(NULL);
1095 	}
1096 	xmlAttrNormalizeSpace(ret, ret);
1097 	*len = (int) strlen((const char *)ret);
1098         return(ret);
1099     } else if (remove_head) {
1100         *len -= remove_head;
1101         memmove(src, src + remove_head, 1 + *len);
1102 	return(src);
1103     }
1104     return(NULL);
1105 }
1106 
1107 /**
1108  * xmlAddDefAttrs:
1109  * @ctxt:  an XML parser context
1110  * @fullname:  the element fullname
1111  * @fullattr:  the attribute fullname
1112  * @value:  the attribute value
1113  *
1114  * Add a defaulted attribute for an element
1115  */
1116 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1117 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1118                const xmlChar *fullname,
1119                const xmlChar *fullattr,
1120                const xmlChar *value) {
1121     xmlDefAttrsPtr defaults;
1122     int len;
1123     const xmlChar *name;
1124     const xmlChar *prefix;
1125 
1126     /*
1127      * Allows to detect attribute redefinitions
1128      */
1129     if (ctxt->attsSpecial != NULL) {
1130         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1131 	    return;
1132     }
1133 
1134     if (ctxt->attsDefault == NULL) {
1135         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1136 	if (ctxt->attsDefault == NULL)
1137 	    goto mem_error;
1138     }
1139 
1140     /*
1141      * split the element name into prefix:localname , the string found
1142      * are within the DTD and then not associated to namespace names.
1143      */
1144     name = xmlSplitQName3(fullname, &len);
1145     if (name == NULL) {
1146         name = xmlDictLookup(ctxt->dict, fullname, -1);
1147 	prefix = NULL;
1148     } else {
1149         name = xmlDictLookup(ctxt->dict, name, -1);
1150 	prefix = xmlDictLookup(ctxt->dict, fullname, len);
1151     }
1152 
1153     /*
1154      * make sure there is some storage
1155      */
1156     defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1157     if (defaults == NULL) {
1158         defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1159 	                   (4 * 5) * sizeof(const xmlChar *));
1160 	if (defaults == NULL)
1161 	    goto mem_error;
1162 	defaults->nbAttrs = 0;
1163 	defaults->maxAttrs = 4;
1164 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165 	                        defaults, NULL) < 0) {
1166 	    xmlFree(defaults);
1167 	    goto mem_error;
1168 	}
1169     } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1170         xmlDefAttrsPtr temp;
1171 
1172         temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1173 		       (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1174 	if (temp == NULL)
1175 	    goto mem_error;
1176 	defaults = temp;
1177 	defaults->maxAttrs *= 2;
1178 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179 	                        defaults, NULL) < 0) {
1180 	    xmlFree(defaults);
1181 	    goto mem_error;
1182 	}
1183     }
1184 
1185     /*
1186      * Split the element name into prefix:localname , the string found
1187      * are within the DTD and hen not associated to namespace names.
1188      */
1189     name = xmlSplitQName3(fullattr, &len);
1190     if (name == NULL) {
1191         name = xmlDictLookup(ctxt->dict, fullattr, -1);
1192 	prefix = NULL;
1193     } else {
1194         name = xmlDictLookup(ctxt->dict, name, -1);
1195 	prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1196     }
1197 
1198     defaults->values[5 * defaults->nbAttrs] = name;
1199     defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1200     /* intern the string and precompute the end */
1201     len = xmlStrlen(value);
1202     value = xmlDictLookup(ctxt->dict, value, len);
1203     defaults->values[5 * defaults->nbAttrs + 2] = value;
1204     defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1205     if (ctxt->external)
1206         defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1207     else
1208         defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1209     defaults->nbAttrs++;
1210 
1211     return;
1212 
1213 mem_error:
1214     xmlErrMemory(ctxt, NULL);
1215     return;
1216 }
1217 
1218 /**
1219  * xmlAddSpecialAttr:
1220  * @ctxt:  an XML parser context
1221  * @fullname:  the element fullname
1222  * @fullattr:  the attribute fullname
1223  * @type:  the attribute type
1224  *
1225  * Register this attribute type
1226  */
1227 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1228 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1229 		  const xmlChar *fullname,
1230 		  const xmlChar *fullattr,
1231 		  int type)
1232 {
1233     if (ctxt->attsSpecial == NULL) {
1234         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1235 	if (ctxt->attsSpecial == NULL)
1236 	    goto mem_error;
1237     }
1238 
1239     if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1240         return;
1241 
1242     xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1243                      (void *) (long) type);
1244     return;
1245 
1246 mem_error:
1247     xmlErrMemory(ctxt, NULL);
1248     return;
1249 }
1250 
1251 /**
1252  * xmlCleanSpecialAttrCallback:
1253  *
1254  * Removes CDATA attributes from the special attribute table
1255  */
1256 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1257 xmlCleanSpecialAttrCallback(void *payload, void *data,
1258                             const xmlChar *fullname, const xmlChar *fullattr,
1259                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1260     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1261 
1262     if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1263         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1264     }
1265 }
1266 
1267 /**
1268  * xmlCleanSpecialAttr:
1269  * @ctxt:  an XML parser context
1270  *
1271  * Trim the list of attributes defined to remove all those of type
1272  * CDATA as they are not special. This call should be done when finishing
1273  * to parse the DTD and before starting to parse the document root.
1274  */
1275 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1276 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1277 {
1278     if (ctxt->attsSpecial == NULL)
1279         return;
1280 
1281     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1282 
1283     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1284         xmlHashFree(ctxt->attsSpecial, NULL);
1285         ctxt->attsSpecial = NULL;
1286     }
1287     return;
1288 }
1289 
1290 /**
1291  * xmlCheckLanguageID:
1292  * @lang:  pointer to the string value
1293  *
1294  * Checks that the value conforms to the LanguageID production:
1295  *
1296  * NOTE: this is somewhat deprecated, those productions were removed from
1297  *       the XML Second edition.
1298  *
1299  * [33] LanguageID ::= Langcode ('-' Subcode)*
1300  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1301  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304  * [38] Subcode ::= ([a-z] | [A-Z])+
1305  *
1306  * Returns 1 if correct 0 otherwise
1307  **/
1308 int
xmlCheckLanguageID(const xmlChar * lang)1309 xmlCheckLanguageID(const xmlChar * lang)
1310 {
1311     const xmlChar *cur = lang;
1312 
1313     if (cur == NULL)
1314         return (0);
1315     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1316         ((cur[0] == 'I') && (cur[1] == '-'))) {
1317         /*
1318          * IANA code
1319          */
1320         cur += 2;
1321         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
1322                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1323             cur++;
1324     } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1325                ((cur[0] == 'X') && (cur[1] == '-'))) {
1326         /*
1327          * User code
1328          */
1329         cur += 2;
1330         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
1331                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1332             cur++;
1333     } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1334                ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1335         /*
1336          * ISO639
1337          */
1338         cur++;
1339         if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1340             ((cur[0] >= 'a') && (cur[0] <= 'z')))
1341             cur++;
1342         else
1343             return (0);
1344     } else
1345         return (0);
1346     while (cur[0] != 0) {       /* non input consuming */
1347         if (cur[0] != '-')
1348             return (0);
1349         cur++;
1350         if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1351             ((cur[0] >= 'a') && (cur[0] <= 'z')))
1352             cur++;
1353         else
1354             return (0);
1355         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||  /* non input consuming */
1356                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1357             cur++;
1358     }
1359     return (1);
1360 }
1361 
1362 /************************************************************************
1363  *									*
1364  *		Parser stacks related functions and macros		*
1365  *									*
1366  ************************************************************************/
1367 
1368 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1369                                             const xmlChar ** str);
1370 
1371 #ifdef SAX2
1372 /**
1373  * nsPush:
1374  * @ctxt:  an XML parser context
1375  * @prefix:  the namespace prefix or NULL
1376  * @URL:  the namespace name
1377  *
1378  * Pushes a new parser namespace on top of the ns stack
1379  *
1380  * Returns -1 in case of error, -2 if the namespace should be discarded
1381  *	   and the index in the stack otherwise.
1382  */
1383 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1384 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1385 {
1386     if (ctxt->options & XML_PARSE_NSCLEAN) {
1387         int i;
1388 	for (i = 0;i < ctxt->nsNr;i += 2) {
1389 	    if (ctxt->nsTab[i] == prefix) {
1390 		/* in scope */
1391 	        if (ctxt->nsTab[i + 1] == URL)
1392 		    return(-2);
1393 		/* out of scope keep it */
1394 		break;
1395 	    }
1396 	}
1397     }
1398     if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1399 	ctxt->nsMax = 10;
1400 	ctxt->nsNr = 0;
1401 	ctxt->nsTab = (const xmlChar **)
1402 	              xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1403 	if (ctxt->nsTab == NULL) {
1404 	    xmlErrMemory(ctxt, NULL);
1405 	    ctxt->nsMax = 0;
1406             return (-1);
1407 	}
1408     } else if (ctxt->nsNr >= ctxt->nsMax) {
1409         const xmlChar ** tmp;
1410         ctxt->nsMax *= 2;
1411         tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1412 				    ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1413         if (tmp == NULL) {
1414             xmlErrMemory(ctxt, NULL);
1415 	    ctxt->nsMax /= 2;
1416             return (-1);
1417         }
1418 	ctxt->nsTab = tmp;
1419     }
1420     ctxt->nsTab[ctxt->nsNr++] = prefix;
1421     ctxt->nsTab[ctxt->nsNr++] = URL;
1422     return (ctxt->nsNr);
1423 }
1424 /**
1425  * nsPop:
1426  * @ctxt: an XML parser context
1427  * @nr:  the number to pop
1428  *
1429  * Pops the top @nr parser prefix/namespace from the ns stack
1430  *
1431  * Returns the number of namespaces removed
1432  */
1433 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1434 nsPop(xmlParserCtxtPtr ctxt, int nr)
1435 {
1436     int i;
1437 
1438     if (ctxt->nsTab == NULL) return(0);
1439     if (ctxt->nsNr < nr) {
1440         xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1441         nr = ctxt->nsNr;
1442     }
1443     if (ctxt->nsNr <= 0)
1444         return (0);
1445 
1446     for (i = 0;i < nr;i++) {
1447          ctxt->nsNr--;
1448 	 ctxt->nsTab[ctxt->nsNr] = NULL;
1449     }
1450     return(nr);
1451 }
1452 #endif
1453 
1454 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1455 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1456     const xmlChar **atts;
1457     int *attallocs;
1458     int maxatts;
1459 
1460     if (ctxt->atts == NULL) {
1461 	maxatts = 55; /* allow for 10 attrs by default */
1462 	atts = (const xmlChar **)
1463 	       xmlMalloc(maxatts * sizeof(xmlChar *));
1464 	if (atts == NULL) goto mem_error;
1465 	ctxt->atts = atts;
1466 	attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1467 	if (attallocs == NULL) goto mem_error;
1468 	ctxt->attallocs = attallocs;
1469 	ctxt->maxatts = maxatts;
1470     } else if (nr + 5 > ctxt->maxatts) {
1471 	maxatts = (nr + 5) * 2;
1472 	atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1473 				     maxatts * sizeof(const xmlChar *));
1474 	if (atts == NULL) goto mem_error;
1475 	ctxt->atts = atts;
1476 	attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1477 	                             (maxatts / 5) * sizeof(int));
1478 	if (attallocs == NULL) goto mem_error;
1479 	ctxt->attallocs = attallocs;
1480 	ctxt->maxatts = maxatts;
1481     }
1482     return(ctxt->maxatts);
1483 mem_error:
1484     xmlErrMemory(ctxt, NULL);
1485     return(-1);
1486 }
1487 
1488 /**
1489  * inputPush:
1490  * @ctxt:  an XML parser context
1491  * @value:  the parser input
1492  *
1493  * Pushes a new parser input on top of the input stack
1494  *
1495  * Returns -1 in case of error, the index in the stack otherwise
1496  */
1497 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1498 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1499 {
1500     if ((ctxt == NULL) || (value == NULL))
1501         return(-1);
1502     if (ctxt->inputNr >= ctxt->inputMax) {
1503         ctxt->inputMax *= 2;
1504         ctxt->inputTab =
1505             (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1506                                              ctxt->inputMax *
1507                                              sizeof(ctxt->inputTab[0]));
1508         if (ctxt->inputTab == NULL) {
1509             xmlErrMemory(ctxt, NULL);
1510 	    xmlFreeInputStream(value);
1511 	    ctxt->inputMax /= 2;
1512 	    value = NULL;
1513             return (-1);
1514         }
1515     }
1516     ctxt->inputTab[ctxt->inputNr] = value;
1517     ctxt->input = value;
1518     return (ctxt->inputNr++);
1519 }
1520 /**
1521  * inputPop:
1522  * @ctxt: an XML parser context
1523  *
1524  * Pops the top parser input from the input stack
1525  *
1526  * Returns the input just removed
1527  */
1528 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1529 inputPop(xmlParserCtxtPtr ctxt)
1530 {
1531     xmlParserInputPtr ret;
1532 
1533     if (ctxt == NULL)
1534         return(NULL);
1535     if (ctxt->inputNr <= 0)
1536         return (NULL);
1537     ctxt->inputNr--;
1538     if (ctxt->inputNr > 0)
1539         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1540     else
1541         ctxt->input = NULL;
1542     ret = ctxt->inputTab[ctxt->inputNr];
1543     ctxt->inputTab[ctxt->inputNr] = NULL;
1544     return (ret);
1545 }
1546 /**
1547  * nodePush:
1548  * @ctxt:  an XML parser context
1549  * @value:  the element node
1550  *
1551  * Pushes a new element node on top of the node stack
1552  *
1553  * Returns -1 in case of error, the index in the stack otherwise
1554  */
1555 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1556 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1557 {
1558     if (ctxt == NULL) return(0);
1559     if (ctxt->nodeNr >= ctxt->nodeMax) {
1560         xmlNodePtr *tmp;
1561 
1562 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1563                                       ctxt->nodeMax * 2 *
1564                                       sizeof(ctxt->nodeTab[0]));
1565         if (tmp == NULL) {
1566             xmlErrMemory(ctxt, NULL);
1567             return (-1);
1568         }
1569         ctxt->nodeTab = tmp;
1570 	ctxt->nodeMax *= 2;
1571     }
1572     if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1573         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1574 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1575 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1576 			  xmlParserMaxDepth);
1577 	ctxt->instate = XML_PARSER_EOF;
1578 	return(-1);
1579     }
1580     ctxt->nodeTab[ctxt->nodeNr] = value;
1581     ctxt->node = value;
1582     return (ctxt->nodeNr++);
1583 }
1584 
1585 /**
1586  * nodePop:
1587  * @ctxt: an XML parser context
1588  *
1589  * Pops the top element node from the node stack
1590  *
1591  * Returns the node just removed
1592  */
1593 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1594 nodePop(xmlParserCtxtPtr ctxt)
1595 {
1596     xmlNodePtr ret;
1597 
1598     if (ctxt == NULL) return(NULL);
1599     if (ctxt->nodeNr <= 0)
1600         return (NULL);
1601     ctxt->nodeNr--;
1602     if (ctxt->nodeNr > 0)
1603         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1604     else
1605         ctxt->node = NULL;
1606     ret = ctxt->nodeTab[ctxt->nodeNr];
1607     ctxt->nodeTab[ctxt->nodeNr] = NULL;
1608     return (ret);
1609 }
1610 
1611 #ifdef LIBXML_PUSH_ENABLED
1612 /**
1613  * nameNsPush:
1614  * @ctxt:  an XML parser context
1615  * @value:  the element name
1616  * @prefix:  the element prefix
1617  * @URI:  the element namespace name
1618  *
1619  * Pushes a new element name/prefix/URL on top of the name stack
1620  *
1621  * Returns -1 in case of error, the index in the stack otherwise
1622  */
1623 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1624 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1625            const xmlChar *prefix, const xmlChar *URI, int nsNr)
1626 {
1627     if (ctxt->nameNr >= ctxt->nameMax) {
1628         const xmlChar * *tmp;
1629         void **tmp2;
1630         ctxt->nameMax *= 2;
1631         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1632                                     ctxt->nameMax *
1633                                     sizeof(ctxt->nameTab[0]));
1634         if (tmp == NULL) {
1635 	    ctxt->nameMax /= 2;
1636 	    goto mem_error;
1637         }
1638 	ctxt->nameTab = tmp;
1639         tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1640                                     ctxt->nameMax * 3 *
1641                                     sizeof(ctxt->pushTab[0]));
1642         if (tmp2 == NULL) {
1643 	    ctxt->nameMax /= 2;
1644 	    goto mem_error;
1645         }
1646 	ctxt->pushTab = tmp2;
1647     }
1648     ctxt->nameTab[ctxt->nameNr] = value;
1649     ctxt->name = value;
1650     ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1651     ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1652     ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1653     return (ctxt->nameNr++);
1654 mem_error:
1655     xmlErrMemory(ctxt, NULL);
1656     return (-1);
1657 }
1658 /**
1659  * nameNsPop:
1660  * @ctxt: an XML parser context
1661  *
1662  * Pops the top element/prefix/URI name from the name stack
1663  *
1664  * Returns the name just removed
1665  */
1666 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1667 nameNsPop(xmlParserCtxtPtr ctxt)
1668 {
1669     const xmlChar *ret;
1670 
1671     if (ctxt->nameNr <= 0)
1672         return (NULL);
1673     ctxt->nameNr--;
1674     if (ctxt->nameNr > 0)
1675         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1676     else
1677         ctxt->name = NULL;
1678     ret = ctxt->nameTab[ctxt->nameNr];
1679     ctxt->nameTab[ctxt->nameNr] = NULL;
1680     return (ret);
1681 }
1682 #endif /* LIBXML_PUSH_ENABLED */
1683 
1684 /**
1685  * namePush:
1686  * @ctxt:  an XML parser context
1687  * @value:  the element name
1688  *
1689  * Pushes a new element name on top of the name stack
1690  *
1691  * Returns -1 in case of error, the index in the stack otherwise
1692  */
1693 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1694 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1695 {
1696     if (ctxt == NULL) return (-1);
1697 
1698     if (ctxt->nameNr >= ctxt->nameMax) {
1699         const xmlChar * *tmp;
1700         ctxt->nameMax *= 2;
1701         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1702                                     ctxt->nameMax *
1703                                     sizeof(ctxt->nameTab[0]));
1704         if (tmp == NULL) {
1705 	    ctxt->nameMax /= 2;
1706 	    goto mem_error;
1707         }
1708 	ctxt->nameTab = tmp;
1709     }
1710     ctxt->nameTab[ctxt->nameNr] = value;
1711     ctxt->name = value;
1712     return (ctxt->nameNr++);
1713 mem_error:
1714     xmlErrMemory(ctxt, NULL);
1715     return (-1);
1716 }
1717 /**
1718  * namePop:
1719  * @ctxt: an XML parser context
1720  *
1721  * Pops the top element name from the name stack
1722  *
1723  * Returns the name just removed
1724  */
1725 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1726 namePop(xmlParserCtxtPtr ctxt)
1727 {
1728     const xmlChar *ret;
1729 
1730     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1731         return (NULL);
1732     ctxt->nameNr--;
1733     if (ctxt->nameNr > 0)
1734         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1735     else
1736         ctxt->name = NULL;
1737     ret = ctxt->nameTab[ctxt->nameNr];
1738     ctxt->nameTab[ctxt->nameNr] = NULL;
1739     return (ret);
1740 }
1741 
spacePush(xmlParserCtxtPtr ctxt,int val)1742 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1743     if (ctxt->spaceNr >= ctxt->spaceMax) {
1744         int *tmp;
1745 
1746 	ctxt->spaceMax *= 2;
1747         tmp = (int *) xmlRealloc(ctxt->spaceTab,
1748 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1749         if (tmp == NULL) {
1750 	    xmlErrMemory(ctxt, NULL);
1751 	    ctxt->spaceMax /=2;
1752 	    return(-1);
1753 	}
1754 	ctxt->spaceTab = tmp;
1755     }
1756     ctxt->spaceTab[ctxt->spaceNr] = val;
1757     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1758     return(ctxt->spaceNr++);
1759 }
1760 
spacePop(xmlParserCtxtPtr ctxt)1761 static int spacePop(xmlParserCtxtPtr ctxt) {
1762     int ret;
1763     if (ctxt->spaceNr <= 0) return(0);
1764     ctxt->spaceNr--;
1765     if (ctxt->spaceNr > 0)
1766 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1767     else
1768         ctxt->space = &ctxt->spaceTab[0];
1769     ret = ctxt->spaceTab[ctxt->spaceNr];
1770     ctxt->spaceTab[ctxt->spaceNr] = -1;
1771     return(ret);
1772 }
1773 
1774 /*
1775  * Macros for accessing the content. Those should be used only by the parser,
1776  * and not exported.
1777  *
1778  * Dirty macros, i.e. one often need to make assumption on the context to
1779  * use them
1780  *
1781  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1782  *           To be used with extreme caution since operations consuming
1783  *           characters may move the input buffer to a different location !
1784  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1785  *           This should be used internally by the parser
1786  *           only to compare to ASCII values otherwise it would break when
1787  *           running with UTF-8 encoding.
1788  *   RAW     same as CUR but in the input buffer, bypass any token
1789  *           extraction that may have been done
1790  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1791  *           to compare on ASCII based substring.
1792  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1793  *           strings without newlines within the parser.
1794  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1795  *           defined char within the parser.
1796  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1797  *
1798  *   NEXT    Skip to the next character, this does the proper decoding
1799  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1800  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1801  *   CUR_CHAR(l) returns the current unicode character (int), set l
1802  *           to the number of xmlChars used for the encoding [0-5].
1803  *   CUR_SCHAR  same but operate on a string instead of the context
1804  *   COPY_BUF  copy the current unicode char to the target buffer, increment
1805  *            the index
1806  *   GROW, SHRINK  handling of input buffers
1807  */
1808 
1809 #define RAW (*ctxt->input->cur)
1810 #define CUR (*ctxt->input->cur)
1811 #define NXT(val) ctxt->input->cur[(val)]
1812 #define CUR_PTR ctxt->input->cur
1813 
1814 #define CMP4( s, c1, c2, c3, c4 ) \
1815   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1816     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1817 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1818   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1819 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1820   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1821 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1822   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1823 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1824   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1825 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1826   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1827     ((unsigned char *) s)[ 8 ] == c9 )
1828 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1829   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1830     ((unsigned char *) s)[ 9 ] == c10 )
1831 
1832 #define SKIP(val) do {							\
1833     ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val);			\
1834     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1835     if ((*ctxt->input->cur == 0) &&					\
1836         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1837 	    xmlPopInput(ctxt);						\
1838   } while (0)
1839 
1840 #define SKIPL(val) do {							\
1841     int skipl;								\
1842     for(skipl=0; skipl<val; skipl++) {					\
1843     	if (*(ctxt->input->cur) == '\n') {				\
1844 	ctxt->input->line++; ctxt->input->col = 1;			\
1845     	} else ctxt->input->col++;					\
1846     	ctxt->nbChars++;						\
1847 	ctxt->input->cur++;						\
1848     }									\
1849     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1850     if ((*ctxt->input->cur == 0) &&					\
1851         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))		\
1852 	    xmlPopInput(ctxt);						\
1853   } while (0)
1854 
1855 #define SHRINK if ((ctxt->progressive == 0) &&				\
1856 		   (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1857 		   (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1858 	xmlSHRINK (ctxt);
1859 
xmlSHRINK(xmlParserCtxtPtr ctxt)1860 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1861     xmlParserInputShrink(ctxt->input);
1862     if ((*ctxt->input->cur == 0) &&
1863         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1864 	    xmlPopInput(ctxt);
1865   }
1866 
1867 #define GROW if ((ctxt->progressive == 0) &&				\
1868 		 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))	\
1869 	xmlGROW (ctxt);
1870 
xmlGROW(xmlParserCtxtPtr ctxt)1871 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1872     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1873     if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
1874         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1875 	    xmlPopInput(ctxt);
1876 }
1877 
1878 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1879 
1880 #define NEXT xmlNextChar(ctxt)
1881 
1882 #define NEXT1 {								\
1883 	ctxt->input->col++;						\
1884 	ctxt->input->cur++;						\
1885 	ctxt->nbChars++;						\
1886 	if (*ctxt->input->cur == 0)					\
1887 	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
1888     }
1889 
1890 #define NEXTL(l) do {							\
1891     if (*(ctxt->input->cur) == '\n') {					\
1892 	ctxt->input->line++; ctxt->input->col = 1;			\
1893     } else ctxt->input->col++;						\
1894     ctxt->input->cur += l;				\
1895     if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
1896   } while (0)
1897 
1898 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1899 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1900 
1901 #define COPY_BUF(l,b,i,v)						\
1902     if (l == 1) b[i++] = (xmlChar) v;					\
1903     else i += xmlCopyCharMultiByte(&b[i],v)
1904 
1905 /**
1906  * xmlSkipBlankChars:
1907  * @ctxt:  the XML parser context
1908  *
1909  * skip all blanks character found at that point in the input streams.
1910  * It pops up finished entities in the process if allowable at that point.
1911  *
1912  * Returns the number of space chars skipped
1913  */
1914 
1915 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)1916 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1917     int res = 0;
1918 
1919     /*
1920      * It's Okay to use CUR/NEXT here since all the blanks are on
1921      * the ASCII range.
1922      */
1923     if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1924 	const xmlChar *cur;
1925 	/*
1926 	 * if we are in the document content, go really fast
1927 	 */
1928 	cur = ctxt->input->cur;
1929 	while (IS_BLANK_CH(*cur)) {
1930 	    if (*cur == '\n') {
1931 		ctxt->input->line++; ctxt->input->col = 1;
1932 	    }
1933 	    cur++;
1934 	    res++;
1935 	    if (*cur == 0) {
1936 		ctxt->input->cur = cur;
1937 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1938 		cur = ctxt->input->cur;
1939 	    }
1940 	}
1941 	ctxt->input->cur = cur;
1942     } else {
1943 	int cur;
1944 	do {
1945 	    cur = CUR;
1946 	    while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1947 		NEXT;
1948 		cur = CUR;
1949 		res++;
1950 	    }
1951 	    while ((cur == 0) && (ctxt->inputNr > 1) &&
1952 		   (ctxt->instate != XML_PARSER_COMMENT)) {
1953 		xmlPopInput(ctxt);
1954 		cur = CUR;
1955 	    }
1956 	    /*
1957 	     * Need to handle support of entities branching here
1958 	     */
1959 	    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1960 	} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1961     }
1962     return(res);
1963 }
1964 
1965 /************************************************************************
1966  *									*
1967  *		Commodity functions to handle entities			*
1968  *									*
1969  ************************************************************************/
1970 
1971 /**
1972  * xmlPopInput:
1973  * @ctxt:  an XML parser context
1974  *
1975  * xmlPopInput: the current input pointed by ctxt->input came to an end
1976  *          pop it and return the next char.
1977  *
1978  * Returns the current xmlChar in the parser context
1979  */
1980 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)1981 xmlPopInput(xmlParserCtxtPtr ctxt) {
1982     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1983     if (xmlParserDebugEntities)
1984 	xmlGenericError(xmlGenericErrorContext,
1985 		"Popping input %d\n", ctxt->inputNr);
1986     xmlFreeInputStream(inputPop(ctxt));
1987     if ((*ctxt->input->cur == 0) &&
1988         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1989 	    return(xmlPopInput(ctxt));
1990     return(CUR);
1991 }
1992 
1993 /**
1994  * xmlPushInput:
1995  * @ctxt:  an XML parser context
1996  * @input:  an XML parser input fragment (entity, XML fragment ...).
1997  *
1998  * xmlPushInput: switch to a new input stream which is stacked on top
1999  *               of the previous one(s).
2000  * Returns -1 in case of error or the index in the input stack
2001  */
2002 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2003 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2004     int ret;
2005     if (input == NULL) return(-1);
2006 
2007     if (xmlParserDebugEntities) {
2008 	if ((ctxt->input != NULL) && (ctxt->input->filename))
2009 	    xmlGenericError(xmlGenericErrorContext,
2010 		    "%s(%d): ", ctxt->input->filename,
2011 		    ctxt->input->line);
2012 	xmlGenericError(xmlGenericErrorContext,
2013 		"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2014     }
2015     ret = inputPush(ctxt, input);
2016     if (ctxt->instate == XML_PARSER_EOF)
2017         return(-1);
2018     GROW;
2019     return(ret);
2020 }
2021 
2022 /**
2023  * xmlParseCharRef:
2024  * @ctxt:  an XML parser context
2025  *
2026  * parse Reference declarations
2027  *
2028  * [66] CharRef ::= '&#' [0-9]+ ';' |
2029  *                  '&#x' [0-9a-fA-F]+ ';'
2030  *
2031  * [ WFC: Legal Character ]
2032  * Characters referred to using character references must match the
2033  * production for Char.
2034  *
2035  * Returns the value parsed (as an int), 0 in case of error
2036  */
2037 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2038 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2039     unsigned int val = 0;
2040     int count = 0;
2041     unsigned int outofrange = 0;
2042 
2043     /*
2044      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2045      */
2046     if ((RAW == '&') && (NXT(1) == '#') &&
2047         (NXT(2) == 'x')) {
2048 	SKIP(3);
2049 	GROW;
2050 	while (RAW != ';') { /* loop blocked by count */
2051 	    if (count++ > 20) {
2052 		count = 0;
2053 		GROW;
2054                 if (ctxt->instate == XML_PARSER_EOF)
2055                     return(0);
2056 	    }
2057 	    if ((RAW >= '0') && (RAW <= '9'))
2058 	        val = val * 16 + (CUR - '0');
2059 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2060 	        val = val * 16 + (CUR - 'a') + 10;
2061 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2062 	        val = val * 16 + (CUR - 'A') + 10;
2063 	    else {
2064 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2065 		val = 0;
2066 		break;
2067 	    }
2068 	    if (val > 0x10FFFF)
2069 	        outofrange = val;
2070 
2071 	    NEXT;
2072 	    count++;
2073 	}
2074 	if (RAW == ';') {
2075 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2076 	    ctxt->input->col++;
2077 	    ctxt->nbChars ++;
2078 	    ctxt->input->cur++;
2079 	}
2080     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2081 	SKIP(2);
2082 	GROW;
2083 	while (RAW != ';') { /* loop blocked by count */
2084 	    if (count++ > 20) {
2085 		count = 0;
2086 		GROW;
2087                 if (ctxt->instate == XML_PARSER_EOF)
2088                     return(0);
2089 	    }
2090 	    if ((RAW >= '0') && (RAW <= '9'))
2091 	        val = val * 10 + (CUR - '0');
2092 	    else {
2093 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2094 		val = 0;
2095 		break;
2096 	    }
2097 	    if (val > 0x10FFFF)
2098 	        outofrange = val;
2099 
2100 	    NEXT;
2101 	    count++;
2102 	}
2103 	if (RAW == ';') {
2104 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2105 	    ctxt->input->col++;
2106 	    ctxt->nbChars ++;
2107 	    ctxt->input->cur++;
2108 	}
2109     } else {
2110         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2111     }
2112 
2113     /*
2114      * [ WFC: Legal Character ]
2115      * Characters referred to using character references must match the
2116      * production for Char.
2117      */
2118     if ((IS_CHAR(val) && (outofrange == 0))) {
2119         return(val);
2120     } else {
2121         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2122                           "xmlParseCharRef: invalid xmlChar value %d\n",
2123 	                  val);
2124     }
2125     return(0);
2126 }
2127 
2128 /**
2129  * xmlParseStringCharRef:
2130  * @ctxt:  an XML parser context
2131  * @str:  a pointer to an index in the string
2132  *
2133  * parse Reference declarations, variant parsing from a string rather
2134  * than an an input flow.
2135  *
2136  * [66] CharRef ::= '&#' [0-9]+ ';' |
2137  *                  '&#x' [0-9a-fA-F]+ ';'
2138  *
2139  * [ WFC: Legal Character ]
2140  * Characters referred to using character references must match the
2141  * production for Char.
2142  *
2143  * Returns the value parsed (as an int), 0 in case of error, str will be
2144  *         updated to the current value of the index
2145  */
2146 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2147 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2148     const xmlChar *ptr;
2149     xmlChar cur;
2150     unsigned int val = 0;
2151     unsigned int outofrange = 0;
2152 
2153     if ((str == NULL) || (*str == NULL)) return(0);
2154     ptr = *str;
2155     cur = *ptr;
2156     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2157 	ptr += 3;
2158 	cur = *ptr;
2159 	while (cur != ';') { /* Non input consuming loop */
2160 	    if ((cur >= '0') && (cur <= '9'))
2161 	        val = val * 16 + (cur - '0');
2162 	    else if ((cur >= 'a') && (cur <= 'f'))
2163 	        val = val * 16 + (cur - 'a') + 10;
2164 	    else if ((cur >= 'A') && (cur <= 'F'))
2165 	        val = val * 16 + (cur - 'A') + 10;
2166 	    else {
2167 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2168 		val = 0;
2169 		break;
2170 	    }
2171 	    if (val > 0x10FFFF)
2172 	        outofrange = val;
2173 
2174 	    ptr++;
2175 	    cur = *ptr;
2176 	}
2177 	if (cur == ';')
2178 	    ptr++;
2179     } else if  ((cur == '&') && (ptr[1] == '#')){
2180 	ptr += 2;
2181 	cur = *ptr;
2182 	while (cur != ';') { /* Non input consuming loops */
2183 	    if ((cur >= '0') && (cur <= '9'))
2184 	        val = val * 10 + (cur - '0');
2185 	    else {
2186 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2187 		val = 0;
2188 		break;
2189 	    }
2190 	    if (val > 0x10FFFF)
2191 	        outofrange = val;
2192 
2193 	    ptr++;
2194 	    cur = *ptr;
2195 	}
2196 	if (cur == ';')
2197 	    ptr++;
2198     } else {
2199 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2200 	return(0);
2201     }
2202     *str = ptr;
2203 
2204     /*
2205      * [ WFC: Legal Character ]
2206      * Characters referred to using character references must match the
2207      * production for Char.
2208      */
2209     if ((IS_CHAR(val) && (outofrange == 0))) {
2210         return(val);
2211     } else {
2212         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2213 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2214 			  val);
2215     }
2216     return(0);
2217 }
2218 
2219 /**
2220  * xmlNewBlanksWrapperInputStream:
2221  * @ctxt:  an XML parser context
2222  * @entity:  an Entity pointer
2223  *
2224  * Create a new input stream for wrapping
2225  * blanks around a PEReference
2226  *
2227  * Returns the new input stream or NULL
2228  */
2229 
deallocblankswrapper(xmlChar * str)2230 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2231 
2232 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2233 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2234     xmlParserInputPtr input;
2235     xmlChar *buffer;
2236     size_t length;
2237     if (entity == NULL) {
2238 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2239 	            "xmlNewBlanksWrapperInputStream entity\n");
2240 	return(NULL);
2241     }
2242     if (xmlParserDebugEntities)
2243 	xmlGenericError(xmlGenericErrorContext,
2244 		"new blanks wrapper for entity: %s\n", entity->name);
2245     input = xmlNewInputStream(ctxt);
2246     if (input == NULL) {
2247 	return(NULL);
2248     }
2249     length = xmlStrlen(entity->name) + 5;
2250     buffer = xmlMallocAtomic(length);
2251     if (buffer == NULL) {
2252 	xmlErrMemory(ctxt, NULL);
2253         xmlFree(input);
2254     	return(NULL);
2255     }
2256     buffer [0] = ' ';
2257     buffer [1] = '%';
2258     buffer [length-3] = ';';
2259     buffer [length-2] = ' ';
2260     buffer [length-1] = 0;
2261     memcpy(buffer + 2, entity->name, length - 5);
2262     input->free = deallocblankswrapper;
2263     input->base = buffer;
2264     input->cur = buffer;
2265     input->length = length;
2266     input->end = &buffer[length];
2267     return(input);
2268 }
2269 
2270 /**
2271  * xmlParserHandlePEReference:
2272  * @ctxt:  the parser context
2273  *
2274  * [69] PEReference ::= '%' Name ';'
2275  *
2276  * [ WFC: No Recursion ]
2277  * A parsed entity must not contain a recursive
2278  * reference to itself, either directly or indirectly.
2279  *
2280  * [ WFC: Entity Declared ]
2281  * In a document without any DTD, a document with only an internal DTD
2282  * subset which contains no parameter entity references, or a document
2283  * with "standalone='yes'", ...  ... The declaration of a parameter
2284  * entity must precede any reference to it...
2285  *
2286  * [ VC: Entity Declared ]
2287  * In a document with an external subset or external parameter entities
2288  * with "standalone='no'", ...  ... The declaration of a parameter entity
2289  * must precede any reference to it...
2290  *
2291  * [ WFC: In DTD ]
2292  * Parameter-entity references may only appear in the DTD.
2293  * NOTE: misleading but this is handled.
2294  *
2295  * A PEReference may have been detected in the current input stream
2296  * the handling is done accordingly to
2297  *      http://www.w3.org/TR/REC-xml#entproc
2298  * i.e.
2299  *   - Included in literal in entity values
2300  *   - Included as Parameter Entity reference within DTDs
2301  */
2302 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2303 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2304     const xmlChar *name;
2305     xmlEntityPtr entity = NULL;
2306     xmlParserInputPtr input;
2307 
2308     if (RAW != '%') return;
2309     switch(ctxt->instate) {
2310 	case XML_PARSER_CDATA_SECTION:
2311 	    return;
2312         case XML_PARSER_COMMENT:
2313 	    return;
2314 	case XML_PARSER_START_TAG:
2315 	    return;
2316 	case XML_PARSER_END_TAG:
2317 	    return;
2318         case XML_PARSER_EOF:
2319 	    xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2320 	    return;
2321         case XML_PARSER_PROLOG:
2322 	case XML_PARSER_START:
2323 	case XML_PARSER_MISC:
2324 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2325 	    return;
2326 	case XML_PARSER_ENTITY_DECL:
2327         case XML_PARSER_CONTENT:
2328         case XML_PARSER_ATTRIBUTE_VALUE:
2329         case XML_PARSER_PI:
2330 	case XML_PARSER_SYSTEM_LITERAL:
2331 	case XML_PARSER_PUBLIC_LITERAL:
2332 	    /* we just ignore it there */
2333 	    return;
2334         case XML_PARSER_EPILOG:
2335 	    xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2336 	    return;
2337 	case XML_PARSER_ENTITY_VALUE:
2338 	    /*
2339 	     * NOTE: in the case of entity values, we don't do the
2340 	     *       substitution here since we need the literal
2341 	     *       entity value to be able to save the internal
2342 	     *       subset of the document.
2343 	     *       This will be handled by xmlStringDecodeEntities
2344 	     */
2345 	    return;
2346         case XML_PARSER_DTD:
2347 	    /*
2348 	     * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2349 	     * In the internal DTD subset, parameter-entity references
2350 	     * can occur only where markup declarations can occur, not
2351 	     * within markup declarations.
2352 	     * In that case this is handled in xmlParseMarkupDecl
2353 	     */
2354 	    if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2355 		return;
2356 	    if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2357 		return;
2358             break;
2359         case XML_PARSER_IGNORE:
2360             return;
2361     }
2362 
2363     NEXT;
2364     name = xmlParseName(ctxt);
2365     if (xmlParserDebugEntities)
2366 	xmlGenericError(xmlGenericErrorContext,
2367 		"PEReference: %s\n", name);
2368     if (name == NULL) {
2369 	xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2370     } else {
2371 	if (RAW == ';') {
2372 	    NEXT;
2373 	    if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2374 		entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2375 	    if (ctxt->instate == XML_PARSER_EOF)
2376 	        return;
2377 	    if (entity == NULL) {
2378 
2379 		/*
2380 		 * [ WFC: Entity Declared ]
2381 		 * In a document without any DTD, a document with only an
2382 		 * internal DTD subset which contains no parameter entity
2383 		 * references, or a document with "standalone='yes'", ...
2384 		 * ... The declaration of a parameter entity must precede
2385 		 * any reference to it...
2386 		 */
2387 		if ((ctxt->standalone == 1) ||
2388 		    ((ctxt->hasExternalSubset == 0) &&
2389 		     (ctxt->hasPErefs == 0))) {
2390 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2391 			 "PEReference: %%%s; not found\n", name);
2392 	        } else {
2393 		    /*
2394 		     * [ VC: Entity Declared ]
2395 		     * In a document with an external subset or external
2396 		     * parameter entities with "standalone='no'", ...
2397 		     * ... The declaration of a parameter entity must precede
2398 		     * any reference to it...
2399 		     */
2400 		    if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2401 		        xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2402 			                 "PEReference: %%%s; not found\n",
2403 				         name, NULL);
2404 		    } else
2405 		        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2406 			              "PEReference: %%%s; not found\n",
2407 				      name, NULL);
2408 		    ctxt->valid = 0;
2409 		}
2410 	    } else if (ctxt->input->free != deallocblankswrapper) {
2411 		    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2412 		    if (xmlPushInput(ctxt, input) < 0)
2413 		        return;
2414 	    } else {
2415 	        if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2416 		    (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2417 		    xmlChar start[4];
2418 		    xmlCharEncoding enc;
2419 
2420 		    /*
2421 		     * handle the extra spaces added before and after
2422 		     * c.f. http://www.w3.org/TR/REC-xml#as-PE
2423 		     * this is done independently.
2424 		     */
2425 		    input = xmlNewEntityInputStream(ctxt, entity);
2426 		    if (xmlPushInput(ctxt, input) < 0)
2427 		        return;
2428 
2429 		    /*
2430 		     * Get the 4 first bytes and decode the charset
2431 		     * if enc != XML_CHAR_ENCODING_NONE
2432 		     * plug some encoding conversion routines.
2433 		     * Note that, since we may have some non-UTF8
2434 		     * encoding (like UTF16, bug 135229), the 'length'
2435 		     * is not known, but we can calculate based upon
2436 		     * the amount of data in the buffer.
2437 		     */
2438 		    GROW
2439                     if (ctxt->instate == XML_PARSER_EOF)
2440                         return;
2441 		    if ((ctxt->input->end - ctxt->input->cur)>=4) {
2442 			start[0] = RAW;
2443 			start[1] = NXT(1);
2444 			start[2] = NXT(2);
2445 			start[3] = NXT(3);
2446 			enc = xmlDetectCharEncoding(start, 4);
2447 			if (enc != XML_CHAR_ENCODING_NONE) {
2448 			    xmlSwitchEncoding(ctxt, enc);
2449 			}
2450 		    }
2451 
2452 		    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2453 			(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2454 			(IS_BLANK_CH(NXT(5)))) {
2455 			xmlParseTextDecl(ctxt);
2456 		    }
2457 		} else {
2458 		    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2459 			     "PEReference: %s is not a parameter entity\n",
2460 				      name);
2461 		}
2462 	    }
2463 	} else {
2464 	    xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2465 	}
2466     }
2467 }
2468 
2469 /*
2470  * Macro used to grow the current buffer.
2471  */
2472 #define growBuffer(buffer, n) {						\
2473     xmlChar *tmp;							\
2474     buffer##_size *= 2;							\
2475     buffer##_size += n;							\
2476     tmp = (xmlChar *)							\
2477 		xmlRealloc(buffer, buffer##_size * sizeof(xmlChar));	\
2478     if (tmp == NULL) goto mem_error;					\
2479     buffer = tmp;							\
2480 }
2481 
2482 /**
2483  * xmlStringLenDecodeEntities:
2484  * @ctxt:  the parser context
2485  * @str:  the input string
2486  * @len: the string length
2487  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2488  * @end:  an end marker xmlChar, 0 if none
2489  * @end2:  an end marker xmlChar, 0 if none
2490  * @end3:  an end marker xmlChar, 0 if none
2491  *
2492  * Takes a entity string content and process to do the adequate substitutions.
2493  *
2494  * [67] Reference ::= EntityRef | CharRef
2495  *
2496  * [69] PEReference ::= '%' Name ';'
2497  *
2498  * Returns A newly allocated string with the substitution done. The caller
2499  *      must deallocate it !
2500  */
2501 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2502 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2503 		      int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2504     xmlChar *buffer = NULL;
2505     int buffer_size = 0;
2506 
2507     xmlChar *current = NULL;
2508     xmlChar *rep = NULL;
2509     const xmlChar *last;
2510     xmlEntityPtr ent;
2511     int c,l;
2512     int nbchars = 0;
2513 
2514     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2515 	return(NULL);
2516     last = str + len;
2517 
2518     if (((ctxt->depth > 40) &&
2519          ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2520 	(ctxt->depth > 1024)) {
2521 	xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2522 	return(NULL);
2523     }
2524 
2525     /*
2526      * allocate a translation buffer.
2527      */
2528     buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2529     buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2530     if (buffer == NULL) goto mem_error;
2531 
2532     /*
2533      * OK loop until we reach one of the ending char or a size limit.
2534      * we are operating on already parsed values.
2535      */
2536     if (str < last)
2537 	c = CUR_SCHAR(str, l);
2538     else
2539         c = 0;
2540     while ((c != 0) && (c != end) && /* non input consuming loop */
2541 	   (c != end2) && (c != end3)) {
2542 
2543 	if (c == 0) break;
2544         if ((c == '&') && (str[1] == '#')) {
2545 	    int val = xmlParseStringCharRef(ctxt, &str);
2546 	    if (val != 0) {
2547 		COPY_BUF(0,buffer,nbchars,val);
2548 	    }
2549 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2550 	        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2551 	    }
2552 	} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2553 	    if (xmlParserDebugEntities)
2554 		xmlGenericError(xmlGenericErrorContext,
2555 			"String decoding Entity Reference: %.30s\n",
2556 			str);
2557 	    ent = xmlParseStringEntityRef(ctxt, &str);
2558 	    if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2559 	        (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2560 	        goto int_error;
2561 	    if (ent != NULL)
2562 	        ctxt->nbentities += ent->checked;
2563 	    if ((ent != NULL) &&
2564 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2565 		if (ent->content != NULL) {
2566 		    COPY_BUF(0,buffer,nbchars,ent->content[0]);
2567 		    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2568 			growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2569 		    }
2570 		} else {
2571 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2572 			    "predefined entity has no content\n");
2573 		}
2574 	    } else if ((ent != NULL) && (ent->content != NULL)) {
2575 		ctxt->depth++;
2576 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2577 			                      0, 0, 0);
2578 		ctxt->depth--;
2579 
2580 		if (rep != NULL) {
2581 		    current = rep;
2582 		    while (*current != 0) { /* non input consuming loop */
2583 			buffer[nbchars++] = *current++;
2584 			if (nbchars >
2585 		            buffer_size - XML_PARSER_BUFFER_SIZE) {
2586 			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2587 				goto int_error;
2588 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2589 			}
2590 		    }
2591 		    xmlFree(rep);
2592 		    rep = NULL;
2593 		}
2594 	    } else if (ent != NULL) {
2595 		int i = xmlStrlen(ent->name);
2596 		const xmlChar *cur = ent->name;
2597 
2598 		buffer[nbchars++] = '&';
2599 		if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2600 		    growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2601 		}
2602 		for (;i > 0;i--)
2603 		    buffer[nbchars++] = *cur++;
2604 		buffer[nbchars++] = ';';
2605 	    }
2606 	} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2607 	    if (xmlParserDebugEntities)
2608 		xmlGenericError(xmlGenericErrorContext,
2609 			"String decoding PE Reference: %.30s\n", str);
2610 	    ent = xmlParseStringPEReference(ctxt, &str);
2611 	    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2612 	        goto int_error;
2613 	    if (ent != NULL)
2614 	        ctxt->nbentities += ent->checked;
2615 	    if (ent != NULL) {
2616                 if (ent->content == NULL) {
2617 		    xmlLoadEntityContent(ctxt, ent);
2618 		}
2619 		ctxt->depth++;
2620 		rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2621 			                      0, 0, 0);
2622 		ctxt->depth--;
2623 		if (rep != NULL) {
2624 		    current = rep;
2625 		    while (*current != 0) { /* non input consuming loop */
2626 			buffer[nbchars++] = *current++;
2627 			if (nbchars >
2628 		            buffer_size - XML_PARSER_BUFFER_SIZE) {
2629 			    if (xmlParserEntityCheck(ctxt, nbchars, ent))
2630 			        goto int_error;
2631 			    growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2632 			}
2633 		    }
2634 		    xmlFree(rep);
2635 		    rep = NULL;
2636 		}
2637 	    }
2638 	} else {
2639 	    COPY_BUF(l,buffer,nbchars,c);
2640 	    str += l;
2641 	    if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2642 	      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2643 	    }
2644 	}
2645 	if (str < last)
2646 	    c = CUR_SCHAR(str, l);
2647 	else
2648 	    c = 0;
2649     }
2650     buffer[nbchars] = 0;
2651     return(buffer);
2652 
2653 mem_error:
2654     xmlErrMemory(ctxt, NULL);
2655 int_error:
2656     if (rep != NULL)
2657         xmlFree(rep);
2658     if (buffer != NULL)
2659         xmlFree(buffer);
2660     return(NULL);
2661 }
2662 
2663 /**
2664  * xmlStringDecodeEntities:
2665  * @ctxt:  the parser context
2666  * @str:  the input string
2667  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2668  * @end:  an end marker xmlChar, 0 if none
2669  * @end2:  an end marker xmlChar, 0 if none
2670  * @end3:  an end marker xmlChar, 0 if none
2671  *
2672  * Takes a entity string content and process to do the adequate substitutions.
2673  *
2674  * [67] Reference ::= EntityRef | CharRef
2675  *
2676  * [69] PEReference ::= '%' Name ';'
2677  *
2678  * Returns A newly allocated string with the substitution done. The caller
2679  *      must deallocate it !
2680  */
2681 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2682 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2683 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2684     if ((ctxt == NULL) || (str == NULL)) return(NULL);
2685     return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2686            end, end2, end3));
2687 }
2688 
2689 /************************************************************************
2690  *									*
2691  *		Commodity functions, cleanup needed ?			*
2692  *									*
2693  ************************************************************************/
2694 
2695 /**
2696  * areBlanks:
2697  * @ctxt:  an XML parser context
2698  * @str:  a xmlChar *
2699  * @len:  the size of @str
2700  * @blank_chars: we know the chars are blanks
2701  *
2702  * Is this a sequence of blank chars that one can ignore ?
2703  *
2704  * Returns 1 if ignorable 0 otherwise.
2705  */
2706 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2707 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2708                      int blank_chars) {
2709     int i, ret;
2710     xmlNodePtr lastChild;
2711 
2712     /*
2713      * Don't spend time trying to differentiate them, the same callback is
2714      * used !
2715      */
2716     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2717 	return(0);
2718 
2719     /*
2720      * Check for xml:space value.
2721      */
2722     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2723         (*(ctxt->space) == -2))
2724 	return(0);
2725 
2726     /*
2727      * Check that the string is made of blanks
2728      */
2729     if (blank_chars == 0) {
2730 	for (i = 0;i < len;i++)
2731 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2732     }
2733 
2734     /*
2735      * Look if the element is mixed content in the DTD if available
2736      */
2737     if (ctxt->node == NULL) return(0);
2738     if (ctxt->myDoc != NULL) {
2739 	ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2740         if (ret == 0) return(1);
2741         if (ret == 1) return(0);
2742     }
2743 
2744     /*
2745      * Otherwise, heuristic :-\
2746      */
2747     if ((RAW != '<') && (RAW != 0xD)) return(0);
2748     if ((ctxt->node->children == NULL) &&
2749 	(RAW == '<') && (NXT(1) == '/')) return(0);
2750 
2751     lastChild = xmlGetLastChild(ctxt->node);
2752     if (lastChild == NULL) {
2753         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2754             (ctxt->node->content != NULL)) return(0);
2755     } else if (xmlNodeIsText(lastChild))
2756         return(0);
2757     else if ((ctxt->node->children != NULL) &&
2758              (xmlNodeIsText(ctxt->node->children)))
2759         return(0);
2760     return(1);
2761 }
2762 
2763 /************************************************************************
2764  *									*
2765  *		Extra stuff for namespace support			*
2766  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2767  *									*
2768  ************************************************************************/
2769 
2770 /**
2771  * xmlSplitQName:
2772  * @ctxt:  an XML parser context
2773  * @name:  an XML parser context
2774  * @prefix:  a xmlChar **
2775  *
2776  * parse an UTF8 encoded XML qualified name string
2777  *
2778  * [NS 5] QName ::= (Prefix ':')? LocalPart
2779  *
2780  * [NS 6] Prefix ::= NCName
2781  *
2782  * [NS 7] LocalPart ::= NCName
2783  *
2784  * Returns the local part, and prefix is updated
2785  *   to get the Prefix if any.
2786  */
2787 
2788 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2789 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2790     xmlChar buf[XML_MAX_NAMELEN + 5];
2791     xmlChar *buffer = NULL;
2792     int len = 0;
2793     int max = XML_MAX_NAMELEN;
2794     xmlChar *ret = NULL;
2795     const xmlChar *cur = name;
2796     int c;
2797 
2798     if (prefix == NULL) return(NULL);
2799     *prefix = NULL;
2800 
2801     if (cur == NULL) return(NULL);
2802 
2803 #ifndef XML_XML_NAMESPACE
2804     /* xml: prefix is not really a namespace */
2805     if ((cur[0] == 'x') && (cur[1] == 'm') &&
2806         (cur[2] == 'l') && (cur[3] == ':'))
2807 	return(xmlStrdup(name));
2808 #endif
2809 
2810     /* nasty but well=formed */
2811     if (cur[0] == ':')
2812 	return(xmlStrdup(name));
2813 
2814     c = *cur++;
2815     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2816 	buf[len++] = c;
2817 	c = *cur++;
2818     }
2819     if (len >= max) {
2820 	/*
2821 	 * Okay someone managed to make a huge name, so he's ready to pay
2822 	 * for the processing speed.
2823 	 */
2824 	max = len * 2;
2825 
2826 	buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2827 	if (buffer == NULL) {
2828 	    xmlErrMemory(ctxt, NULL);
2829 	    return(NULL);
2830 	}
2831 	memcpy(buffer, buf, len);
2832 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2833 	    if (len + 10 > max) {
2834 	        xmlChar *tmp;
2835 
2836 		max *= 2;
2837 		tmp = (xmlChar *) xmlRealloc(buffer,
2838 						max * sizeof(xmlChar));
2839 		if (tmp == NULL) {
2840 		    xmlFree(buffer);
2841 		    xmlErrMemory(ctxt, NULL);
2842 		    return(NULL);
2843 		}
2844 		buffer = tmp;
2845 	    }
2846 	    buffer[len++] = c;
2847 	    c = *cur++;
2848 	}
2849 	buffer[len] = 0;
2850     }
2851 
2852     if ((c == ':') && (*cur == 0)) {
2853         if (buffer != NULL)
2854 	    xmlFree(buffer);
2855 	*prefix = NULL;
2856 	return(xmlStrdup(name));
2857     }
2858 
2859     if (buffer == NULL)
2860 	ret = xmlStrndup(buf, len);
2861     else {
2862 	ret = buffer;
2863 	buffer = NULL;
2864 	max = XML_MAX_NAMELEN;
2865     }
2866 
2867 
2868     if (c == ':') {
2869 	c = *cur;
2870         *prefix = ret;
2871 	if (c == 0) {
2872 	    return(xmlStrndup(BAD_CAST "", 0));
2873 	}
2874 	len = 0;
2875 
2876 	/*
2877 	 * Check that the first character is proper to start
2878 	 * a new name
2879 	 */
2880 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
2881 	      ((c >= 0x41) && (c <= 0x5A)) ||
2882 	      (c == '_') || (c == ':'))) {
2883 	    int l;
2884 	    int first = CUR_SCHAR(cur, l);
2885 
2886 	    if (!IS_LETTER(first) && (first != '_')) {
2887 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2888 			    "Name %s is not XML Namespace compliant\n",
2889 				  name);
2890 	    }
2891 	}
2892 	cur++;
2893 
2894 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2895 	    buf[len++] = c;
2896 	    c = *cur++;
2897 	}
2898 	if (len >= max) {
2899 	    /*
2900 	     * Okay someone managed to make a huge name, so he's ready to pay
2901 	     * for the processing speed.
2902 	     */
2903 	    max = len * 2;
2904 
2905 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2906 	    if (buffer == NULL) {
2907 	        xmlErrMemory(ctxt, NULL);
2908 		return(NULL);
2909 	    }
2910 	    memcpy(buffer, buf, len);
2911 	    while (c != 0) { /* tested bigname2.xml */
2912 		if (len + 10 > max) {
2913 		    xmlChar *tmp;
2914 
2915 		    max *= 2;
2916 		    tmp = (xmlChar *) xmlRealloc(buffer,
2917 						    max * sizeof(xmlChar));
2918 		    if (tmp == NULL) {
2919 			xmlErrMemory(ctxt, NULL);
2920 			xmlFree(buffer);
2921 			return(NULL);
2922 		    }
2923 		    buffer = tmp;
2924 		}
2925 		buffer[len++] = c;
2926 		c = *cur++;
2927 	    }
2928 	    buffer[len] = 0;
2929 	}
2930 
2931 	if (buffer == NULL)
2932 	    ret = xmlStrndup(buf, len);
2933 	else {
2934 	    ret = buffer;
2935 	}
2936     }
2937 
2938     return(ret);
2939 }
2940 
2941 /************************************************************************
2942  *									*
2943  *			The parser itself				*
2944  *	Relates to http://www.w3.org/TR/REC-xml				*
2945  *									*
2946  ************************************************************************/
2947 
2948 /************************************************************************
2949  *									*
2950  *	Routines to parse Name, NCName and NmToken			*
2951  *									*
2952  ************************************************************************/
2953 #ifdef DEBUG
2954 static unsigned long nbParseName = 0;
2955 static unsigned long nbParseNmToken = 0;
2956 static unsigned long nbParseNCName = 0;
2957 static unsigned long nbParseNCNameComplex = 0;
2958 static unsigned long nbParseNameComplex = 0;
2959 static unsigned long nbParseStringName = 0;
2960 #endif
2961 
2962 /*
2963  * The two following functions are related to the change of accepted
2964  * characters for Name and NmToken in the Revision 5 of XML-1.0
2965  * They correspond to the modified production [4] and the new production [4a]
2966  * changes in that revision. Also note that the macros used for the
2967  * productions Letter, Digit, CombiningChar and Extender are not needed
2968  * anymore.
2969  * We still keep compatibility to pre-revision5 parsing semantic if the
2970  * new XML_PARSE_OLD10 option is given to the parser.
2971  */
2972 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)2973 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2974     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2975         /*
2976 	 * Use the new checks of production [4] [4a] amd [5] of the
2977 	 * Update 5 of XML-1.0
2978 	 */
2979 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2980 	    (((c >= 'a') && (c <= 'z')) ||
2981 	     ((c >= 'A') && (c <= 'Z')) ||
2982 	     (c == '_') || (c == ':') ||
2983 	     ((c >= 0xC0) && (c <= 0xD6)) ||
2984 	     ((c >= 0xD8) && (c <= 0xF6)) ||
2985 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
2986 	     ((c >= 0x370) && (c <= 0x37D)) ||
2987 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
2988 	     ((c >= 0x200C) && (c <= 0x200D)) ||
2989 	     ((c >= 0x2070) && (c <= 0x218F)) ||
2990 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2991 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
2992 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
2993 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2994 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
2995 	    return(1);
2996     } else {
2997         if (IS_LETTER(c) || (c == '_') || (c == ':'))
2998 	    return(1);
2999     }
3000     return(0);
3001 }
3002 
3003 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3004 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3005     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3006         /*
3007 	 * Use the new checks of production [4] [4a] amd [5] of the
3008 	 * Update 5 of XML-1.0
3009 	 */
3010 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3011 	    (((c >= 'a') && (c <= 'z')) ||
3012 	     ((c >= 'A') && (c <= 'Z')) ||
3013 	     ((c >= '0') && (c <= '9')) || /* !start */
3014 	     (c == '_') || (c == ':') ||
3015 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3016 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3017 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3018 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3019 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3020 	     ((c >= 0x370) && (c <= 0x37D)) ||
3021 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3022 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3023 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3024 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3025 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3026 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3027 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3028 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3029 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3030 	     return(1);
3031     } else {
3032         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3033             (c == '.') || (c == '-') ||
3034 	    (c == '_') || (c == ':') ||
3035 	    (IS_COMBINING(c)) ||
3036 	    (IS_EXTENDER(c)))
3037 	    return(1);
3038     }
3039     return(0);
3040 }
3041 
3042 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3043                                           int *len, int *alloc, int normalize);
3044 
3045 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3046 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3047     int len = 0, l;
3048     int c;
3049     int count = 0;
3050 
3051 #ifdef DEBUG
3052     nbParseNameComplex++;
3053 #endif
3054 
3055     /*
3056      * Handler for more complex cases
3057      */
3058     GROW;
3059     if (ctxt->instate == XML_PARSER_EOF)
3060 	return(NULL);
3061     c = CUR_CHAR(l);
3062     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3063         /*
3064 	 * Use the new checks of production [4] [4a] amd [5] of the
3065 	 * Update 5 of XML-1.0
3066 	 */
3067 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3068 	    (!(((c >= 'a') && (c <= 'z')) ||
3069 	       ((c >= 'A') && (c <= 'Z')) ||
3070 	       (c == '_') || (c == ':') ||
3071 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3072 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3073 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3074 	       ((c >= 0x370) && (c <= 0x37D)) ||
3075 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3076 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3077 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3078 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3079 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3080 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3081 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3082 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3083 	    return(NULL);
3084 	}
3085 	len += l;
3086 	NEXTL(l);
3087 	c = CUR_CHAR(l);
3088 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3089 	       (((c >= 'a') && (c <= 'z')) ||
3090 	        ((c >= 'A') && (c <= 'Z')) ||
3091 	        ((c >= '0') && (c <= '9')) || /* !start */
3092 	        (c == '_') || (c == ':') ||
3093 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3094 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3095 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3096 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3097 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3098 	        ((c >= 0x370) && (c <= 0x37D)) ||
3099 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3100 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3101 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3102 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3103 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3104 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3105 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3106 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3107 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3108 		)) {
3109 	    if (count++ > 100) {
3110 		count = 0;
3111 		GROW;
3112                 if (ctxt->instate == XML_PARSER_EOF)
3113                     return(NULL);
3114 	    }
3115 	    len += l;
3116 	    NEXTL(l);
3117 	    c = CUR_CHAR(l);
3118 	}
3119     } else {
3120 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3121 	    (!IS_LETTER(c) && (c != '_') &&
3122 	     (c != ':'))) {
3123 	    return(NULL);
3124 	}
3125 	len += l;
3126 	NEXTL(l);
3127 	c = CUR_CHAR(l);
3128 
3129 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3130 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131 		(c == '.') || (c == '-') ||
3132 		(c == '_') || (c == ':') ||
3133 		(IS_COMBINING(c)) ||
3134 		(IS_EXTENDER(c)))) {
3135 	    if (count++ > 100) {
3136 		count = 0;
3137 		GROW;
3138                 if (ctxt->instate == XML_PARSER_EOF)
3139                     return(NULL);
3140 	    }
3141 	    len += l;
3142 	    NEXTL(l);
3143 	    c = CUR_CHAR(l);
3144 	}
3145     }
3146     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3147         return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3148     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3149 }
3150 
3151 /**
3152  * xmlParseName:
3153  * @ctxt:  an XML parser context
3154  *
3155  * parse an XML name.
3156  *
3157  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3158  *                  CombiningChar | Extender
3159  *
3160  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3161  *
3162  * [6] Names ::= Name (#x20 Name)*
3163  *
3164  * Returns the Name parsed or NULL
3165  */
3166 
3167 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3168 xmlParseName(xmlParserCtxtPtr ctxt) {
3169     const xmlChar *in;
3170     const xmlChar *ret;
3171     int count = 0;
3172 
3173     GROW;
3174 
3175 #ifdef DEBUG
3176     nbParseName++;
3177 #endif
3178 
3179     /*
3180      * Accelerator for simple ASCII names
3181      */
3182     in = ctxt->input->cur;
3183     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3184 	((*in >= 0x41) && (*in <= 0x5A)) ||
3185 	(*in == '_') || (*in == ':')) {
3186 	in++;
3187 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3188 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3189 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3190 	       (*in == '_') || (*in == '-') ||
3191 	       (*in == ':') || (*in == '.'))
3192 	    in++;
3193 	if ((*in > 0) && (*in < 0x80)) {
3194 	    count = in - ctxt->input->cur;
3195 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3196 	    ctxt->input->cur = in;
3197 	    ctxt->nbChars += count;
3198 	    ctxt->input->col += count;
3199 	    if (ret == NULL)
3200 	        xmlErrMemory(ctxt, NULL);
3201 	    return(ret);
3202 	}
3203     }
3204     /* accelerator for special cases */
3205     return(xmlParseNameComplex(ctxt));
3206 }
3207 
3208 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3209 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3210     int len = 0, l;
3211     int c;
3212     int count = 0;
3213 
3214 #ifdef DEBUG
3215     nbParseNCNameComplex++;
3216 #endif
3217 
3218     /*
3219      * Handler for more complex cases
3220      */
3221     GROW;
3222     c = CUR_CHAR(l);
3223     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3225 	return(NULL);
3226     }
3227 
3228     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3229 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3230 	if (count++ > 100) {
3231 	    count = 0;
3232 	    GROW;
3233             if (ctxt->instate == XML_PARSER_EOF)
3234                 return(NULL);
3235 	}
3236 	len += l;
3237 	NEXTL(l);
3238 	c = CUR_CHAR(l);
3239     }
3240     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3241 }
3242 
3243 /**
3244  * xmlParseNCName:
3245  * @ctxt:  an XML parser context
3246  * @len:  lenght of the string parsed
3247  *
3248  * parse an XML name.
3249  *
3250  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3251  *                      CombiningChar | Extender
3252  *
3253  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3254  *
3255  * Returns the Name parsed or NULL
3256  */
3257 
3258 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3259 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3260     const xmlChar *in;
3261     const xmlChar *ret;
3262     int count = 0;
3263 
3264 #ifdef DEBUG
3265     nbParseNCName++;
3266 #endif
3267 
3268     /*
3269      * Accelerator for simple ASCII names
3270      */
3271     in = ctxt->input->cur;
3272     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3273 	((*in >= 0x41) && (*in <= 0x5A)) ||
3274 	(*in == '_')) {
3275 	in++;
3276 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3277 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3278 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3279 	       (*in == '_') || (*in == '-') ||
3280 	       (*in == '.'))
3281 	    in++;
3282 	if ((*in > 0) && (*in < 0x80)) {
3283 	    count = in - ctxt->input->cur;
3284 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3285 	    ctxt->input->cur = in;
3286 	    ctxt->nbChars += count;
3287 	    ctxt->input->col += count;
3288 	    if (ret == NULL) {
3289 	        xmlErrMemory(ctxt, NULL);
3290 	    }
3291 	    return(ret);
3292 	}
3293     }
3294     return(xmlParseNCNameComplex(ctxt));
3295 }
3296 
3297 /**
3298  * xmlParseNameAndCompare:
3299  * @ctxt:  an XML parser context
3300  *
3301  * parse an XML name and compares for match
3302  * (specialized for endtag parsing)
3303  *
3304  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3305  * and the name for mismatch
3306  */
3307 
3308 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3309 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3310     register const xmlChar *cmp = other;
3311     register const xmlChar *in;
3312     const xmlChar *ret;
3313 
3314     GROW;
3315     if (ctxt->instate == XML_PARSER_EOF)
3316         return(NULL);
3317 
3318     in = ctxt->input->cur;
3319     while (*in != 0 && *in == *cmp) {
3320 	++in;
3321 	++cmp;
3322 	ctxt->input->col++;
3323     }
3324     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3325 	/* success */
3326 	ctxt->input->cur = in;
3327 	return (const xmlChar*) 1;
3328     }
3329     /* failure (or end of input buffer), check with full function */
3330     ret = xmlParseName (ctxt);
3331     /* strings coming from the dictionnary direct compare possible */
3332     if (ret == other) {
3333 	return (const xmlChar*) 1;
3334     }
3335     return ret;
3336 }
3337 
3338 /**
3339  * xmlParseStringName:
3340  * @ctxt:  an XML parser context
3341  * @str:  a pointer to the string pointer (IN/OUT)
3342  *
3343  * parse an XML name.
3344  *
3345  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3346  *                  CombiningChar | Extender
3347  *
3348  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3349  *
3350  * [6] Names ::= Name (#x20 Name)*
3351  *
3352  * Returns the Name parsed or NULL. The @str pointer
3353  * is updated to the current location in the string.
3354  */
3355 
3356 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3357 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3358     xmlChar buf[XML_MAX_NAMELEN + 5];
3359     const xmlChar *cur = *str;
3360     int len = 0, l;
3361     int c;
3362 
3363 #ifdef DEBUG
3364     nbParseStringName++;
3365 #endif
3366 
3367     c = CUR_SCHAR(cur, l);
3368     if (!xmlIsNameStartChar(ctxt, c)) {
3369 	return(NULL);
3370     }
3371 
3372     COPY_BUF(l,buf,len,c);
3373     cur += l;
3374     c = CUR_SCHAR(cur, l);
3375     while (xmlIsNameChar(ctxt, c)) {
3376 	COPY_BUF(l,buf,len,c);
3377 	cur += l;
3378 	c = CUR_SCHAR(cur, l);
3379 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3380 	    /*
3381 	     * Okay someone managed to make a huge name, so he's ready to pay
3382 	     * for the processing speed.
3383 	     */
3384 	    xmlChar *buffer;
3385 	    int max = len * 2;
3386 
3387 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3388 	    if (buffer == NULL) {
3389 	        xmlErrMemory(ctxt, NULL);
3390 		return(NULL);
3391 	    }
3392 	    memcpy(buffer, buf, len);
3393 	    while (xmlIsNameChar(ctxt, c)) {
3394 		if (len + 10 > max) {
3395 		    xmlChar *tmp;
3396 		    max *= 2;
3397 		    tmp = (xmlChar *) xmlRealloc(buffer,
3398 			                            max * sizeof(xmlChar));
3399 		    if (tmp == NULL) {
3400 			xmlErrMemory(ctxt, NULL);
3401 			xmlFree(buffer);
3402 			return(NULL);
3403 		    }
3404 		    buffer = tmp;
3405 		}
3406 		COPY_BUF(l,buffer,len,c);
3407 		cur += l;
3408 		c = CUR_SCHAR(cur, l);
3409 	    }
3410 	    buffer[len] = 0;
3411 	    *str = cur;
3412 	    return(buffer);
3413 	}
3414     }
3415     *str = cur;
3416     return(xmlStrndup(buf, len));
3417 }
3418 
3419 /**
3420  * xmlParseNmtoken:
3421  * @ctxt:  an XML parser context
3422  *
3423  * parse an XML Nmtoken.
3424  *
3425  * [7] Nmtoken ::= (NameChar)+
3426  *
3427  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3428  *
3429  * Returns the Nmtoken parsed or NULL
3430  */
3431 
3432 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3433 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3434     xmlChar buf[XML_MAX_NAMELEN + 5];
3435     int len = 0, l;
3436     int c;
3437     int count = 0;
3438 
3439 #ifdef DEBUG
3440     nbParseNmToken++;
3441 #endif
3442 
3443     GROW;
3444     if (ctxt->instate == XML_PARSER_EOF)
3445         return(NULL);
3446     c = CUR_CHAR(l);
3447 
3448     while (xmlIsNameChar(ctxt, c)) {
3449 	if (count++ > 100) {
3450 	    count = 0;
3451 	    GROW;
3452 	}
3453 	COPY_BUF(l,buf,len,c);
3454 	NEXTL(l);
3455 	c = CUR_CHAR(l);
3456 	if (len >= XML_MAX_NAMELEN) {
3457 	    /*
3458 	     * Okay someone managed to make a huge token, so he's ready to pay
3459 	     * for the processing speed.
3460 	     */
3461 	    xmlChar *buffer;
3462 	    int max = len * 2;
3463 
3464 	    buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3465 	    if (buffer == NULL) {
3466 	        xmlErrMemory(ctxt, NULL);
3467 		return(NULL);
3468 	    }
3469 	    memcpy(buffer, buf, len);
3470 	    while (xmlIsNameChar(ctxt, c)) {
3471 		if (count++ > 100) {
3472 		    count = 0;
3473 		    GROW;
3474                     if (ctxt->instate == XML_PARSER_EOF) {
3475                         xmlFree(buffer);
3476                         return(NULL);
3477                     }
3478 		}
3479 		if (len + 10 > max) {
3480 		    xmlChar *tmp;
3481 
3482 		    max *= 2;
3483 		    tmp = (xmlChar *) xmlRealloc(buffer,
3484 			                            max * sizeof(xmlChar));
3485 		    if (tmp == NULL) {
3486 			xmlErrMemory(ctxt, NULL);
3487 			xmlFree(buffer);
3488 			return(NULL);
3489 		    }
3490 		    buffer = tmp;
3491 		}
3492 		COPY_BUF(l,buffer,len,c);
3493 		NEXTL(l);
3494 		c = CUR_CHAR(l);
3495 	    }
3496 	    buffer[len] = 0;
3497 	    return(buffer);
3498 	}
3499     }
3500     if (len == 0)
3501         return(NULL);
3502     return(xmlStrndup(buf, len));
3503 }
3504 
3505 /**
3506  * xmlParseEntityValue:
3507  * @ctxt:  an XML parser context
3508  * @orig:  if non-NULL store a copy of the original entity value
3509  *
3510  * parse a value for ENTITY declarations
3511  *
3512  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3513  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3514  *
3515  * Returns the EntityValue parsed with reference substituted or NULL
3516  */
3517 
3518 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3519 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3520     xmlChar *buf = NULL;
3521     int len = 0;
3522     int size = XML_PARSER_BUFFER_SIZE;
3523     int c, l;
3524     xmlChar stop;
3525     xmlChar *ret = NULL;
3526     const xmlChar *cur = NULL;
3527     xmlParserInputPtr input;
3528 
3529     if (RAW == '"') stop = '"';
3530     else if (RAW == '\'') stop = '\'';
3531     else {
3532 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3533 	return(NULL);
3534     }
3535     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3536     if (buf == NULL) {
3537 	xmlErrMemory(ctxt, NULL);
3538 	return(NULL);
3539     }
3540 
3541     /*
3542      * The content of the entity definition is copied in a buffer.
3543      */
3544 
3545     ctxt->instate = XML_PARSER_ENTITY_VALUE;
3546     input = ctxt->input;
3547     GROW;
3548     if (ctxt->instate == XML_PARSER_EOF) {
3549         xmlFree(buf);
3550         return(NULL);
3551     }
3552     NEXT;
3553     c = CUR_CHAR(l);
3554     /*
3555      * NOTE: 4.4.5 Included in Literal
3556      * When a parameter entity reference appears in a literal entity
3557      * value, ... a single or double quote character in the replacement
3558      * text is always treated as a normal data character and will not
3559      * terminate the literal.
3560      * In practice it means we stop the loop only when back at parsing
3561      * the initial entity and the quote is found
3562      */
3563     while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3564 	    (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3565 	if (len + 5 >= size) {
3566 	    xmlChar *tmp;
3567 
3568 	    size *= 2;
3569 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3570 	    if (tmp == NULL) {
3571 		xmlErrMemory(ctxt, NULL);
3572 		xmlFree(buf);
3573 		return(NULL);
3574 	    }
3575 	    buf = tmp;
3576 	}
3577 	COPY_BUF(l,buf,len,c);
3578 	NEXTL(l);
3579 	/*
3580 	 * Pop-up of finished entities.
3581 	 */
3582 	while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3583 	    xmlPopInput(ctxt);
3584 
3585 	GROW;
3586 	c = CUR_CHAR(l);
3587 	if (c == 0) {
3588 	    GROW;
3589 	    c = CUR_CHAR(l);
3590 	}
3591     }
3592     buf[len] = 0;
3593     if (ctxt->instate == XML_PARSER_EOF) {
3594         xmlFree(buf);
3595         return(NULL);
3596     }
3597 
3598     /*
3599      * Raise problem w.r.t. '&' and '%' being used in non-entities
3600      * reference constructs. Note Charref will be handled in
3601      * xmlStringDecodeEntities()
3602      */
3603     cur = buf;
3604     while (*cur != 0) { /* non input consuming */
3605 	if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3606 	    xmlChar *name;
3607 	    xmlChar tmp = *cur;
3608 
3609 	    cur++;
3610 	    name = xmlParseStringName(ctxt, &cur);
3611             if ((name == NULL) || (*cur != ';')) {
3612 		xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3613 	    "EntityValue: '%c' forbidden except for entities references\n",
3614 	                          tmp);
3615 	    }
3616 	    if ((tmp == '%') && (ctxt->inSubset == 1) &&
3617 		(ctxt->inputNr == 1)) {
3618 		xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3619 	    }
3620 	    if (name != NULL)
3621 		xmlFree(name);
3622 	    if (*cur == 0)
3623 	        break;
3624 	}
3625 	cur++;
3626     }
3627 
3628     /*
3629      * Then PEReference entities are substituted.
3630      */
3631     if (c != stop) {
3632 	xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3633 	xmlFree(buf);
3634     } else {
3635 	NEXT;
3636 	/*
3637 	 * NOTE: 4.4.7 Bypassed
3638 	 * When a general entity reference appears in the EntityValue in
3639 	 * an entity declaration, it is bypassed and left as is.
3640 	 * so XML_SUBSTITUTE_REF is not set here.
3641 	 */
3642 	ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3643 				      0, 0, 0);
3644 	if (orig != NULL)
3645 	    *orig = buf;
3646 	else
3647 	    xmlFree(buf);
3648     }
3649 
3650     return(ret);
3651 }
3652 
3653 /**
3654  * xmlParseAttValueComplex:
3655  * @ctxt:  an XML parser context
3656  * @len:   the resulting attribute len
3657  * @normalize:  wether to apply the inner normalization
3658  *
3659  * parse a value for an attribute, this is the fallback function
3660  * of xmlParseAttValue() when the attribute parsing requires handling
3661  * of non-ASCII characters, or normalization compaction.
3662  *
3663  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3664  */
3665 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3666 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3667     xmlChar limit = 0;
3668     xmlChar *buf = NULL;
3669     xmlChar *rep = NULL;
3670     int len = 0;
3671     int buf_size = 0;
3672     int c, l, in_space = 0;
3673     xmlChar *current = NULL;
3674     xmlEntityPtr ent;
3675 
3676     if (NXT(0) == '"') {
3677 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3678 	limit = '"';
3679         NEXT;
3680     } else if (NXT(0) == '\'') {
3681 	limit = '\'';
3682 	ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3683         NEXT;
3684     } else {
3685 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3686 	return(NULL);
3687     }
3688 
3689     /*
3690      * allocate a translation buffer.
3691      */
3692     buf_size = XML_PARSER_BUFFER_SIZE;
3693     buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3694     if (buf == NULL) goto mem_error;
3695 
3696     /*
3697      * OK loop until we reach one of the ending char or a size limit.
3698      */
3699     c = CUR_CHAR(l);
3700     while (((NXT(0) != limit) && /* checked */
3701             (IS_CHAR(c)) && (c != '<')) &&
3702             (ctxt->instate != XML_PARSER_EOF)) {
3703 	if (c == 0) break;
3704 	if (c == '&') {
3705 	    in_space = 0;
3706 	    if (NXT(1) == '#') {
3707 		int val = xmlParseCharRef(ctxt);
3708 
3709 		if (val == '&') {
3710 		    if (ctxt->replaceEntities) {
3711 			if (len > buf_size - 10) {
3712 			    growBuffer(buf, 10);
3713 			}
3714 			buf[len++] = '&';
3715 		    } else {
3716 			/*
3717 			 * The reparsing will be done in xmlStringGetNodeList()
3718 			 * called by the attribute() function in SAX.c
3719 			 */
3720 			if (len > buf_size - 10) {
3721 			    growBuffer(buf, 10);
3722 			}
3723 			buf[len++] = '&';
3724 			buf[len++] = '#';
3725 			buf[len++] = '3';
3726 			buf[len++] = '8';
3727 			buf[len++] = ';';
3728 		    }
3729 		} else if (val != 0) {
3730 		    if (len > buf_size - 10) {
3731 			growBuffer(buf, 10);
3732 		    }
3733 		    len += xmlCopyChar(0, &buf[len], val);
3734 		}
3735 	    } else {
3736 		ent = xmlParseEntityRef(ctxt);
3737 		ctxt->nbentities++;
3738 		if (ent != NULL)
3739 		    ctxt->nbentities += ent->owner;
3740 		if ((ent != NULL) &&
3741 		    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3742 		    if (len > buf_size - 10) {
3743 			growBuffer(buf, 10);
3744 		    }
3745 		    if ((ctxt->replaceEntities == 0) &&
3746 		        (ent->content[0] == '&')) {
3747 			buf[len++] = '&';
3748 			buf[len++] = '#';
3749 			buf[len++] = '3';
3750 			buf[len++] = '8';
3751 			buf[len++] = ';';
3752 		    } else {
3753 			buf[len++] = ent->content[0];
3754 		    }
3755 		} else if ((ent != NULL) &&
3756 		           (ctxt->replaceEntities != 0)) {
3757 		    if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3758 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3759 						      XML_SUBSTITUTE_REF,
3760 						      0, 0, 0);
3761 			if (rep != NULL) {
3762 			    current = rep;
3763 			    while (*current != 0) { /* non input consuming */
3764                                 if ((*current == 0xD) || (*current == 0xA) ||
3765                                     (*current == 0x9)) {
3766                                     buf[len++] = 0x20;
3767                                     current++;
3768                                 } else
3769                                     buf[len++] = *current++;
3770 				if (len > buf_size - 10) {
3771 				    growBuffer(buf, 10);
3772 				}
3773 			    }
3774 			    xmlFree(rep);
3775 			    rep = NULL;
3776 			}
3777 		    } else {
3778 			if (len > buf_size - 10) {
3779 			    growBuffer(buf, 10);
3780 			}
3781 			if (ent->content != NULL)
3782 			    buf[len++] = ent->content[0];
3783 		    }
3784 		} else if (ent != NULL) {
3785 		    int i = xmlStrlen(ent->name);
3786 		    const xmlChar *cur = ent->name;
3787 
3788 		    /*
3789 		     * This may look absurd but is needed to detect
3790 		     * entities problems
3791 		     */
3792 		    if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3793 			(ent->content != NULL)) {
3794 			rep = xmlStringDecodeEntities(ctxt, ent->content,
3795 						  XML_SUBSTITUTE_REF, 0, 0, 0);
3796 			if (rep != NULL) {
3797 			    xmlFree(rep);
3798 			    rep = NULL;
3799 			}
3800 		    }
3801 
3802 		    /*
3803 		     * Just output the reference
3804 		     */
3805 		    buf[len++] = '&';
3806 		    while (len > buf_size - i - 10) {
3807 			growBuffer(buf, i + 10);
3808 		    }
3809 		    for (;i > 0;i--)
3810 			buf[len++] = *cur++;
3811 		    buf[len++] = ';';
3812 		}
3813 	    }
3814 	} else {
3815 	    if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3816 	        if ((len != 0) || (!normalize)) {
3817 		    if ((!normalize) || (!in_space)) {
3818 			COPY_BUF(l,buf,len,0x20);
3819 			while (len > buf_size - 10) {
3820 			    growBuffer(buf, 10);
3821 			}
3822 		    }
3823 		    in_space = 1;
3824 		}
3825 	    } else {
3826 	        in_space = 0;
3827 		COPY_BUF(l,buf,len,c);
3828 		if (len > buf_size - 10) {
3829 		    growBuffer(buf, 10);
3830 		}
3831 	    }
3832 	    NEXTL(l);
3833 	}
3834 	GROW;
3835 	c = CUR_CHAR(l);
3836     }
3837     if (ctxt->instate == XML_PARSER_EOF)
3838         goto error;
3839 
3840     if ((in_space) && (normalize)) {
3841         while ((len > 0) && (buf[len - 1] == 0x20)) len--;
3842     }
3843     buf[len] = 0;
3844     if (RAW == '<') {
3845 	xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3846     } else if (RAW != limit) {
3847 	if ((c != 0) && (!IS_CHAR(c))) {
3848 	    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3849 			   "invalid character in attribute value\n");
3850 	} else {
3851 	    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3852 			   "AttValue: ' expected\n");
3853         }
3854     } else
3855 	NEXT;
3856     if (attlen != NULL) *attlen = len;
3857     return(buf);
3858 
3859 mem_error:
3860     xmlErrMemory(ctxt, NULL);
3861 error:
3862     if (buf != NULL)
3863         xmlFree(buf);
3864     if (rep != NULL)
3865         xmlFree(rep);
3866     return(NULL);
3867 }
3868 
3869 /**
3870  * xmlParseAttValue:
3871  * @ctxt:  an XML parser context
3872  *
3873  * parse a value for an attribute
3874  * Note: the parser won't do substitution of entities here, this
3875  * will be handled later in xmlStringGetNodeList
3876  *
3877  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3878  *                   "'" ([^<&'] | Reference)* "'"
3879  *
3880  * 3.3.3 Attribute-Value Normalization:
3881  * Before the value of an attribute is passed to the application or
3882  * checked for validity, the XML processor must normalize it as follows:
3883  * - a character reference is processed by appending the referenced
3884  *   character to the attribute value
3885  * - an entity reference is processed by recursively processing the
3886  *   replacement text of the entity
3887  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3888  *   appending #x20 to the normalized value, except that only a single
3889  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
3890  *   parsed entity or the literal entity value of an internal parsed entity
3891  * - other characters are processed by appending them to the normalized value
3892  * If the declared value is not CDATA, then the XML processor must further
3893  * process the normalized attribute value by discarding any leading and
3894  * trailing space (#x20) characters, and by replacing sequences of space
3895  * (#x20) characters by a single space (#x20) character.
3896  * All attributes for which no declaration has been read should be treated
3897  * by a non-validating parser as if declared CDATA.
3898  *
3899  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3900  */
3901 
3902 
3903 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)3904 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3905     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3906     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3907 }
3908 
3909 /**
3910  * xmlParseSystemLiteral:
3911  * @ctxt:  an XML parser context
3912  *
3913  * parse an XML Literal
3914  *
3915  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3916  *
3917  * Returns the SystemLiteral parsed or NULL
3918  */
3919 
3920 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)3921 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3922     xmlChar *buf = NULL;
3923     int len = 0;
3924     int size = XML_PARSER_BUFFER_SIZE;
3925     int cur, l;
3926     xmlChar stop;
3927     int state = ctxt->instate;
3928     int count = 0;
3929 
3930     SHRINK;
3931     if (RAW == '"') {
3932         NEXT;
3933 	stop = '"';
3934     } else if (RAW == '\'') {
3935         NEXT;
3936 	stop = '\'';
3937     } else {
3938 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3939 	return(NULL);
3940     }
3941 
3942     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3943     if (buf == NULL) {
3944         xmlErrMemory(ctxt, NULL);
3945 	return(NULL);
3946     }
3947     ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3948     cur = CUR_CHAR(l);
3949     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3950 	if (len + 5 >= size) {
3951 	    xmlChar *tmp;
3952 
3953 	    size *= 2;
3954 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3955 	    if (tmp == NULL) {
3956 	        xmlFree(buf);
3957 		xmlErrMemory(ctxt, NULL);
3958 		ctxt->instate = (xmlParserInputState) state;
3959 		return(NULL);
3960 	    }
3961 	    buf = tmp;
3962 	}
3963 	count++;
3964 	if (count > 50) {
3965 	    GROW;
3966 	    count = 0;
3967             if (ctxt->instate == XML_PARSER_EOF) {
3968 	        xmlFree(buf);
3969 		return(NULL);
3970             }
3971 	}
3972 	COPY_BUF(l,buf,len,cur);
3973 	NEXTL(l);
3974 	cur = CUR_CHAR(l);
3975 	if (cur == 0) {
3976 	    GROW;
3977 	    SHRINK;
3978 	    cur = CUR_CHAR(l);
3979 	}
3980     }
3981     buf[len] = 0;
3982     ctxt->instate = (xmlParserInputState) state;
3983     if (!IS_CHAR(cur)) {
3984 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3985     } else {
3986 	NEXT;
3987     }
3988     return(buf);
3989 }
3990 
3991 /**
3992  * xmlParsePubidLiteral:
3993  * @ctxt:  an XML parser context
3994  *
3995  * parse an XML public literal
3996  *
3997  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3998  *
3999  * Returns the PubidLiteral parsed or NULL.
4000  */
4001 
4002 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4003 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4004     xmlChar *buf = NULL;
4005     int len = 0;
4006     int size = XML_PARSER_BUFFER_SIZE;
4007     xmlChar cur;
4008     xmlChar stop;
4009     int count = 0;
4010     xmlParserInputState oldstate = ctxt->instate;
4011 
4012     SHRINK;
4013     if (RAW == '"') {
4014         NEXT;
4015 	stop = '"';
4016     } else if (RAW == '\'') {
4017         NEXT;
4018 	stop = '\'';
4019     } else {
4020 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4021 	return(NULL);
4022     }
4023     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4024     if (buf == NULL) {
4025 	xmlErrMemory(ctxt, NULL);
4026 	return(NULL);
4027     }
4028     ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4029     cur = CUR;
4030     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4031 	if (len + 1 >= size) {
4032 	    xmlChar *tmp;
4033 
4034 	    size *= 2;
4035 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4036 	    if (tmp == NULL) {
4037 		xmlErrMemory(ctxt, NULL);
4038 		xmlFree(buf);
4039 		return(NULL);
4040 	    }
4041 	    buf = tmp;
4042 	}
4043 	buf[len++] = cur;
4044 	count++;
4045 	if (count > 50) {
4046 	    GROW;
4047 	    count = 0;
4048             if (ctxt->instate == XML_PARSER_EOF) {
4049 		xmlFree(buf);
4050 		return(NULL);
4051             }
4052 	}
4053 	NEXT;
4054 	cur = CUR;
4055 	if (cur == 0) {
4056 	    GROW;
4057 	    SHRINK;
4058 	    cur = CUR;
4059 	}
4060     }
4061     buf[len] = 0;
4062     if (cur != stop) {
4063 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4064     } else {
4065 	NEXT;
4066     }
4067     ctxt->instate = oldstate;
4068     return(buf);
4069 }
4070 
4071 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4072 
4073 /*
4074  * used for the test in the inner loop of the char data testing
4075  */
4076 static const unsigned char test_char_data[256] = {
4077     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4078     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4079     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4080     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4081     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4082     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4083     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4084     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4085     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4086     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4087     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4088     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4089     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4090     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4091     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4092     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4093     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4094     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4095     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4096     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4097     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4098     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4099     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4100     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4101     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4102     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4103     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4104     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4105     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4106     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4107     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4108     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4109 };
4110 
4111 /**
4112  * xmlParseCharData:
4113  * @ctxt:  an XML parser context
4114  * @cdata:  int indicating whether we are within a CDATA section
4115  *
4116  * parse a CharData section.
4117  * if we are within a CDATA section ']]>' marks an end of section.
4118  *
4119  * The right angle bracket (>) may be represented using the string "&gt;",
4120  * and must, for compatibility, be escaped using "&gt;" or a character
4121  * reference when it appears in the string "]]>" in content, when that
4122  * string is not marking the end of a CDATA section.
4123  *
4124  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4125  */
4126 
4127 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4128 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4129     const xmlChar *in;
4130     int nbchar = 0;
4131     int line = ctxt->input->line;
4132     int col = ctxt->input->col;
4133     int ccol;
4134 
4135     SHRINK;
4136     GROW;
4137     /*
4138      * Accelerated common case where input don't need to be
4139      * modified before passing it to the handler.
4140      */
4141     if (!cdata) {
4142 	in = ctxt->input->cur;
4143 	do {
4144 get_more_space:
4145 	    while (*in == 0x20) { in++; ctxt->input->col++; }
4146 	    if (*in == 0xA) {
4147 		do {
4148 		    ctxt->input->line++; ctxt->input->col = 1;
4149 		    in++;
4150 		} while (*in == 0xA);
4151 		goto get_more_space;
4152 	    }
4153 	    if (*in == '<') {
4154 		nbchar = in - ctxt->input->cur;
4155 		if (nbchar > 0) {
4156 		    const xmlChar *tmp = ctxt->input->cur;
4157 		    ctxt->input->cur = in;
4158 
4159 		    if ((ctxt->sax != NULL) &&
4160 		        (ctxt->sax->ignorableWhitespace !=
4161 		         ctxt->sax->characters)) {
4162 			if (areBlanks(ctxt, tmp, nbchar, 1)) {
4163 			    if (ctxt->sax->ignorableWhitespace != NULL)
4164 				ctxt->sax->ignorableWhitespace(ctxt->userData,
4165 						       tmp, nbchar);
4166 			} else {
4167 			    if (ctxt->sax->characters != NULL)
4168 				ctxt->sax->characters(ctxt->userData,
4169 						      tmp, nbchar);
4170 			    if (*ctxt->space == -1)
4171 			        *ctxt->space = -2;
4172 			}
4173 		    } else if ((ctxt->sax != NULL) &&
4174 		               (ctxt->sax->characters != NULL)) {
4175 			ctxt->sax->characters(ctxt->userData,
4176 					      tmp, nbchar);
4177 		    }
4178 		}
4179 		return;
4180 	    }
4181 
4182 get_more:
4183             ccol = ctxt->input->col;
4184 	    while (test_char_data[*in]) {
4185 		in++;
4186 		ccol++;
4187 	    }
4188 	    ctxt->input->col = ccol;
4189 	    if (*in == 0xA) {
4190 		do {
4191 		    ctxt->input->line++; ctxt->input->col = 1;
4192 		    in++;
4193 		} while (*in == 0xA);
4194 		goto get_more;
4195 	    }
4196 	    if (*in == ']') {
4197 		if ((in[1] == ']') && (in[2] == '>')) {
4198 		    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4199 		    ctxt->input->cur = in;
4200 		    return;
4201 		}
4202 		in++;
4203 		ctxt->input->col++;
4204 		goto get_more;
4205 	    }
4206 	    nbchar = in - ctxt->input->cur;
4207 	    if (nbchar > 0) {
4208 		if ((ctxt->sax != NULL) &&
4209 		    (ctxt->sax->ignorableWhitespace !=
4210 		     ctxt->sax->characters) &&
4211 		    (IS_BLANK_CH(*ctxt->input->cur))) {
4212 		    const xmlChar *tmp = ctxt->input->cur;
4213 		    ctxt->input->cur = in;
4214 
4215 		    if (areBlanks(ctxt, tmp, nbchar, 0)) {
4216 		        if (ctxt->sax->ignorableWhitespace != NULL)
4217 			    ctxt->sax->ignorableWhitespace(ctxt->userData,
4218 							   tmp, nbchar);
4219 		    } else {
4220 		        if (ctxt->sax->characters != NULL)
4221 			    ctxt->sax->characters(ctxt->userData,
4222 						  tmp, nbchar);
4223 			if (*ctxt->space == -1)
4224 			    *ctxt->space = -2;
4225 		    }
4226                     line = ctxt->input->line;
4227                     col = ctxt->input->col;
4228 		} else if (ctxt->sax != NULL) {
4229 		    if (ctxt->sax->characters != NULL)
4230 			ctxt->sax->characters(ctxt->userData,
4231 					      ctxt->input->cur, nbchar);
4232                     line = ctxt->input->line;
4233                     col = ctxt->input->col;
4234 		}
4235                 /* something really bad happened in the SAX callback */
4236                 if (ctxt->instate != XML_PARSER_CONTENT)
4237                     return;
4238 	    }
4239 	    ctxt->input->cur = in;
4240 	    if (*in == 0xD) {
4241 		in++;
4242 		if (*in == 0xA) {
4243 		    ctxt->input->cur = in;
4244 		    in++;
4245 		    ctxt->input->line++; ctxt->input->col = 1;
4246 		    continue; /* while */
4247 		}
4248 		in--;
4249 	    }
4250 	    if (*in == '<') {
4251 		return;
4252 	    }
4253 	    if (*in == '&') {
4254 		return;
4255 	    }
4256 	    SHRINK;
4257 	    GROW;
4258             if (ctxt->instate == XML_PARSER_EOF)
4259 		return;
4260 	    in = ctxt->input->cur;
4261 	} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4262 	nbchar = 0;
4263     }
4264     ctxt->input->line = line;
4265     ctxt->input->col = col;
4266     xmlParseCharDataComplex(ctxt, cdata);
4267 }
4268 
4269 /**
4270  * xmlParseCharDataComplex:
4271  * @ctxt:  an XML parser context
4272  * @cdata:  int indicating whether we are within a CDATA section
4273  *
4274  * parse a CharData section.this is the fallback function
4275  * of xmlParseCharData() when the parsing requires handling
4276  * of non-ASCII characters.
4277  */
4278 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4279 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4280     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4281     int nbchar = 0;
4282     int cur, l;
4283     int count = 0;
4284 
4285     SHRINK;
4286     GROW;
4287     cur = CUR_CHAR(l);
4288     while ((cur != '<') && /* checked */
4289            (cur != '&') &&
4290 	   (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4291 	if ((cur == ']') && (NXT(1) == ']') &&
4292 	    (NXT(2) == '>')) {
4293 	    if (cdata) break;
4294 	    else {
4295 		xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4296 	    }
4297 	}
4298 	COPY_BUF(l,buf,nbchar,cur);
4299 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4300 	    buf[nbchar] = 0;
4301 
4302 	    /*
4303 	     * OK the segment is to be consumed as chars.
4304 	     */
4305 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4306 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4307 		    if (ctxt->sax->ignorableWhitespace != NULL)
4308 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4309 			                               buf, nbchar);
4310 		} else {
4311 		    if (ctxt->sax->characters != NULL)
4312 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4313 		    if ((ctxt->sax->characters !=
4314 		         ctxt->sax->ignorableWhitespace) &&
4315 			(*ctxt->space == -1))
4316 			*ctxt->space = -2;
4317 		}
4318 	    }
4319 	    nbchar = 0;
4320             /* something really bad happened in the SAX callback */
4321             if (ctxt->instate != XML_PARSER_CONTENT)
4322                 return;
4323 	}
4324 	count++;
4325 	if (count > 50) {
4326 	    GROW;
4327 	    count = 0;
4328             if (ctxt->instate == XML_PARSER_EOF)
4329 		return;
4330 	}
4331 	NEXTL(l);
4332 	cur = CUR_CHAR(l);
4333     }
4334     if (nbchar != 0) {
4335         buf[nbchar] = 0;
4336 	/*
4337 	 * OK the segment is to be consumed as chars.
4338 	 */
4339 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4340 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4341 		if (ctxt->sax->ignorableWhitespace != NULL)
4342 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4343 	    } else {
4344 		if (ctxt->sax->characters != NULL)
4345 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4346 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4347 		    (*ctxt->space == -1))
4348 		    *ctxt->space = -2;
4349 	    }
4350 	}
4351     }
4352     if ((cur != 0) && (!IS_CHAR(cur))) {
4353 	/* Generate the error and skip the offending character */
4354         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4355                           "PCDATA invalid Char value %d\n",
4356 	                  cur);
4357 	NEXTL(l);
4358     }
4359 }
4360 
4361 /**
4362  * xmlParseExternalID:
4363  * @ctxt:  an XML parser context
4364  * @publicID:  a xmlChar** receiving PubidLiteral
4365  * @strict: indicate whether we should restrict parsing to only
4366  *          production [75], see NOTE below
4367  *
4368  * Parse an External ID or a Public ID
4369  *
4370  * NOTE: Productions [75] and [83] interact badly since [75] can generate
4371  *       'PUBLIC' S PubidLiteral S SystemLiteral
4372  *
4373  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4374  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4375  *
4376  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4377  *
4378  * Returns the function returns SystemLiteral and in the second
4379  *                case publicID receives PubidLiteral, is strict is off
4380  *                it is possible to return NULL and have publicID set.
4381  */
4382 
4383 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4384 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4385     xmlChar *URI = NULL;
4386 
4387     SHRINK;
4388 
4389     *publicID = NULL;
4390     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4391         SKIP(6);
4392 	if (!IS_BLANK_CH(CUR)) {
4393 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4394 	                   "Space required after 'SYSTEM'\n");
4395 	}
4396         SKIP_BLANKS;
4397 	URI = xmlParseSystemLiteral(ctxt);
4398 	if (URI == NULL) {
4399 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4400         }
4401     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4402         SKIP(6);
4403 	if (!IS_BLANK_CH(CUR)) {
4404 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4405 		    "Space required after 'PUBLIC'\n");
4406 	}
4407         SKIP_BLANKS;
4408 	*publicID = xmlParsePubidLiteral(ctxt);
4409 	if (*publicID == NULL) {
4410 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4411 	}
4412 	if (strict) {
4413 	    /*
4414 	     * We don't handle [83] so "S SystemLiteral" is required.
4415 	     */
4416 	    if (!IS_BLANK_CH(CUR)) {
4417 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4418 			"Space required after the Public Identifier\n");
4419 	    }
4420 	} else {
4421 	    /*
4422 	     * We handle [83] so we return immediately, if
4423 	     * "S SystemLiteral" is not detected. From a purely parsing
4424 	     * point of view that's a nice mess.
4425 	     */
4426 	    const xmlChar *ptr;
4427 	    GROW;
4428 
4429 	    ptr = CUR_PTR;
4430 	    if (!IS_BLANK_CH(*ptr)) return(NULL);
4431 
4432 	    while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4433 	    if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4434 	}
4435         SKIP_BLANKS;
4436 	URI = xmlParseSystemLiteral(ctxt);
4437 	if (URI == NULL) {
4438 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4439         }
4440     }
4441     return(URI);
4442 }
4443 
4444 /**
4445  * xmlParseCommentComplex:
4446  * @ctxt:  an XML parser context
4447  * @buf:  the already parsed part of the buffer
4448  * @len:  number of bytes filles in the buffer
4449  * @size:  allocated size of the buffer
4450  *
4451  * Skip an XML (SGML) comment <!-- .... -->
4452  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4453  *  must not occur within comments. "
4454  * This is the slow routine in case the accelerator for ascii didn't work
4455  *
4456  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4457  */
4458 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,int len,int size)4459 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4460     int q, ql;
4461     int r, rl;
4462     int cur, l;
4463     int count = 0;
4464     int inputid;
4465 
4466     inputid = ctxt->input->id;
4467 
4468     if (buf == NULL) {
4469         len = 0;
4470 	size = XML_PARSER_BUFFER_SIZE;
4471 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4472 	if (buf == NULL) {
4473 	    xmlErrMemory(ctxt, NULL);
4474 	    return;
4475 	}
4476     }
4477     GROW;	/* Assure there's enough input data */
4478     q = CUR_CHAR(ql);
4479     if (q == 0)
4480         goto not_terminated;
4481     if (!IS_CHAR(q)) {
4482         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4483                           "xmlParseComment: invalid xmlChar value %d\n",
4484 	                  q);
4485 	xmlFree (buf);
4486 	return;
4487     }
4488     NEXTL(ql);
4489     r = CUR_CHAR(rl);
4490     if (r == 0)
4491         goto not_terminated;
4492     if (!IS_CHAR(r)) {
4493         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4494                           "xmlParseComment: invalid xmlChar value %d\n",
4495 	                  q);
4496 	xmlFree (buf);
4497 	return;
4498     }
4499     NEXTL(rl);
4500     cur = CUR_CHAR(l);
4501     if (cur == 0)
4502         goto not_terminated;
4503     while (IS_CHAR(cur) && /* checked */
4504            ((cur != '>') ||
4505 	    (r != '-') || (q != '-'))) {
4506 	if ((r == '-') && (q == '-')) {
4507 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4508 	}
4509 	if (len + 5 >= size) {
4510 	    xmlChar *new_buf;
4511 	    size *= 2;
4512 	    new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4513 	    if (new_buf == NULL) {
4514 		xmlFree (buf);
4515 		xmlErrMemory(ctxt, NULL);
4516 		return;
4517 	    }
4518 	    buf = new_buf;
4519 	}
4520 	COPY_BUF(ql,buf,len,q);
4521 	q = r;
4522 	ql = rl;
4523 	r = cur;
4524 	rl = l;
4525 
4526 	count++;
4527 	if (count > 50) {
4528 	    GROW;
4529 	    count = 0;
4530             if (ctxt->instate == XML_PARSER_EOF) {
4531 		xmlFree(buf);
4532 		return;
4533             }
4534 	}
4535 	NEXTL(l);
4536 	cur = CUR_CHAR(l);
4537 	if (cur == 0) {
4538 	    SHRINK;
4539 	    GROW;
4540 	    cur = CUR_CHAR(l);
4541 	}
4542     }
4543     buf[len] = 0;
4544     if (cur == 0) {
4545 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4546 	                     "Comment not terminated \n<!--%.50s\n", buf);
4547     } else if (!IS_CHAR(cur)) {
4548         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4549                           "xmlParseComment: invalid xmlChar value %d\n",
4550 	                  cur);
4551     } else {
4552 	if (inputid != ctxt->input->id) {
4553 	    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4554 		"Comment doesn't start and stop in the same entity\n");
4555 	}
4556         NEXT;
4557 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4558 	    (!ctxt->disableSAX))
4559 	    ctxt->sax->comment(ctxt->userData, buf);
4560     }
4561     xmlFree(buf);
4562     return;
4563 not_terminated:
4564     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4565 			 "Comment not terminated\n", NULL);
4566     xmlFree(buf);
4567     return;
4568 }
4569 
4570 /**
4571  * xmlParseComment:
4572  * @ctxt:  an XML parser context
4573  *
4574  * Skip an XML (SGML) comment <!-- .... -->
4575  *  The spec says that "For compatibility, the string "--" (double-hyphen)
4576  *  must not occur within comments. "
4577  *
4578  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4579  */
4580 void
xmlParseComment(xmlParserCtxtPtr ctxt)4581 xmlParseComment(xmlParserCtxtPtr ctxt) {
4582     xmlChar *buf = NULL;
4583     int size = XML_PARSER_BUFFER_SIZE;
4584     int len = 0;
4585     xmlParserInputState state;
4586     const xmlChar *in;
4587     int nbchar = 0, ccol;
4588     int inputid;
4589 
4590     /*
4591      * Check that there is a comment right here.
4592      */
4593     if ((RAW != '<') || (NXT(1) != '!') ||
4594         (NXT(2) != '-') || (NXT(3) != '-')) return;
4595     state = ctxt->instate;
4596     ctxt->instate = XML_PARSER_COMMENT;
4597     inputid = ctxt->input->id;
4598     SKIP(4);
4599     SHRINK;
4600     GROW;
4601 
4602     /*
4603      * Accelerated common case where input don't need to be
4604      * modified before passing it to the handler.
4605      */
4606     in = ctxt->input->cur;
4607     do {
4608 	if (*in == 0xA) {
4609 	    do {
4610 		ctxt->input->line++; ctxt->input->col = 1;
4611 		in++;
4612 	    } while (*in == 0xA);
4613 	}
4614 get_more:
4615         ccol = ctxt->input->col;
4616 	while (((*in > '-') && (*in <= 0x7F)) ||
4617 	       ((*in >= 0x20) && (*in < '-')) ||
4618 	       (*in == 0x09)) {
4619 		    in++;
4620 		    ccol++;
4621 	}
4622 	ctxt->input->col = ccol;
4623 	if (*in == 0xA) {
4624 	    do {
4625 		ctxt->input->line++; ctxt->input->col = 1;
4626 		in++;
4627 	    } while (*in == 0xA);
4628 	    goto get_more;
4629 	}
4630 	nbchar = in - ctxt->input->cur;
4631 	/*
4632 	 * save current set of data
4633 	 */
4634 	if (nbchar > 0) {
4635 	    if ((ctxt->sax != NULL) &&
4636 		(ctxt->sax->comment != NULL)) {
4637 		if (buf == NULL) {
4638 		    if ((*in == '-') && (in[1] == '-'))
4639 		        size = nbchar + 1;
4640 		    else
4641 		        size = XML_PARSER_BUFFER_SIZE + nbchar;
4642 		    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4643 		    if (buf == NULL) {
4644 		        xmlErrMemory(ctxt, NULL);
4645 			ctxt->instate = state;
4646 			return;
4647 		    }
4648 		    len = 0;
4649 		} else if (len + nbchar + 1 >= size) {
4650 		    xmlChar *new_buf;
4651 		    size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4652 		    new_buf = (xmlChar *) xmlRealloc(buf,
4653 		                                     size * sizeof(xmlChar));
4654 		    if (new_buf == NULL) {
4655 		        xmlFree (buf);
4656 			xmlErrMemory(ctxt, NULL);
4657 			ctxt->instate = state;
4658 			return;
4659 		    }
4660 		    buf = new_buf;
4661 		}
4662 		memcpy(&buf[len], ctxt->input->cur, nbchar);
4663 		len += nbchar;
4664 		buf[len] = 0;
4665 	    }
4666 	}
4667 	ctxt->input->cur = in;
4668 	if (*in == 0xA) {
4669 	    in++;
4670 	    ctxt->input->line++; ctxt->input->col = 1;
4671 	}
4672 	if (*in == 0xD) {
4673 	    in++;
4674 	    if (*in == 0xA) {
4675 		ctxt->input->cur = in;
4676 		in++;
4677 		ctxt->input->line++; ctxt->input->col = 1;
4678 		continue; /* while */
4679 	    }
4680 	    in--;
4681 	}
4682 	SHRINK;
4683 	GROW;
4684         if (ctxt->instate == XML_PARSER_EOF) {
4685             xmlFree(buf);
4686             return;
4687 	}
4688 	in = ctxt->input->cur;
4689 	if (*in == '-') {
4690 	    if (in[1] == '-') {
4691 	        if (in[2] == '>') {
4692 		    if (ctxt->input->id != inputid) {
4693 			xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4694 			"comment doesn't start and stop in the same entity\n");
4695 		    }
4696 		    SKIP(3);
4697 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4698 		        (!ctxt->disableSAX)) {
4699 			if (buf != NULL)
4700 			    ctxt->sax->comment(ctxt->userData, buf);
4701 			else
4702 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4703 		    }
4704 		    if (buf != NULL)
4705 		        xmlFree(buf);
4706 		    if (ctxt->instate != XML_PARSER_EOF)
4707 			ctxt->instate = state;
4708 		    return;
4709 		}
4710 		if (buf != NULL)
4711 		    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4712 		                      "Comment not terminated \n<!--%.50s\n",
4713 				      buf);
4714 		else
4715 		    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4716 		                      "Comment not terminated \n", NULL);
4717 		in++;
4718 		ctxt->input->col++;
4719 	    }
4720 	    in++;
4721 	    ctxt->input->col++;
4722 	    goto get_more;
4723 	}
4724     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4725     xmlParseCommentComplex(ctxt, buf, len, size);
4726     ctxt->instate = state;
4727     return;
4728 }
4729 
4730 
4731 /**
4732  * xmlParsePITarget:
4733  * @ctxt:  an XML parser context
4734  *
4735  * parse the name of a PI
4736  *
4737  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4738  *
4739  * Returns the PITarget name or NULL
4740  */
4741 
4742 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4743 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4744     const xmlChar *name;
4745 
4746     name = xmlParseName(ctxt);
4747     if ((name != NULL) &&
4748         ((name[0] == 'x') || (name[0] == 'X')) &&
4749         ((name[1] == 'm') || (name[1] == 'M')) &&
4750         ((name[2] == 'l') || (name[2] == 'L'))) {
4751 	int i;
4752 	if ((name[0] == 'x') && (name[1] == 'm') &&
4753 	    (name[2] == 'l') && (name[3] == 0)) {
4754 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4755 		 "XML declaration allowed only at the start of the document\n");
4756 	    return(name);
4757 	} else if (name[3] == 0) {
4758 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4759 	    return(name);
4760 	}
4761 	for (i = 0;;i++) {
4762 	    if (xmlW3CPIs[i] == NULL) break;
4763 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4764 	        return(name);
4765 	}
4766 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4767 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
4768 		      NULL, NULL);
4769     }
4770     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4771 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
4772 		 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4773     }
4774     return(name);
4775 }
4776 
4777 #ifdef LIBXML_CATALOG_ENABLED
4778 /**
4779  * xmlParseCatalogPI:
4780  * @ctxt:  an XML parser context
4781  * @catalog:  the PI value string
4782  *
4783  * parse an XML Catalog Processing Instruction.
4784  *
4785  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4786  *
4787  * Occurs only if allowed by the user and if happening in the Misc
4788  * part of the document before any doctype informations
4789  * This will add the given catalog to the parsing context in order
4790  * to be used if there is a resolution need further down in the document
4791  */
4792 
4793 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)4794 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4795     xmlChar *URL = NULL;
4796     const xmlChar *tmp, *base;
4797     xmlChar marker;
4798 
4799     tmp = catalog;
4800     while (IS_BLANK_CH(*tmp)) tmp++;
4801     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4802 	goto error;
4803     tmp += 7;
4804     while (IS_BLANK_CH(*tmp)) tmp++;
4805     if (*tmp != '=') {
4806 	return;
4807     }
4808     tmp++;
4809     while (IS_BLANK_CH(*tmp)) tmp++;
4810     marker = *tmp;
4811     if ((marker != '\'') && (marker != '"'))
4812 	goto error;
4813     tmp++;
4814     base = tmp;
4815     while ((*tmp != 0) && (*tmp != marker)) tmp++;
4816     if (*tmp == 0)
4817 	goto error;
4818     URL = xmlStrndup(base, tmp - base);
4819     tmp++;
4820     while (IS_BLANK_CH(*tmp)) tmp++;
4821     if (*tmp != 0)
4822 	goto error;
4823 
4824     if (URL != NULL) {
4825 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4826 	xmlFree(URL);
4827     }
4828     return;
4829 
4830 error:
4831     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4832 	          "Catalog PI syntax error: %s\n",
4833 		  catalog, NULL);
4834     if (URL != NULL)
4835 	xmlFree(URL);
4836 }
4837 #endif
4838 
4839 /**
4840  * xmlParsePI:
4841  * @ctxt:  an XML parser context
4842  *
4843  * parse an XML Processing Instruction.
4844  *
4845  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4846  *
4847  * The processing is transfered to SAX once parsed.
4848  */
4849 
4850 void
xmlParsePI(xmlParserCtxtPtr ctxt)4851 xmlParsePI(xmlParserCtxtPtr ctxt) {
4852     xmlChar *buf = NULL;
4853     int len = 0;
4854     int size = XML_PARSER_BUFFER_SIZE;
4855     int cur, l;
4856     const xmlChar *target;
4857     xmlParserInputState state;
4858     int count = 0;
4859 
4860     if ((RAW == '<') && (NXT(1) == '?')) {
4861 	xmlParserInputPtr input = ctxt->input;
4862 	state = ctxt->instate;
4863         ctxt->instate = XML_PARSER_PI;
4864 	/*
4865 	 * this is a Processing Instruction.
4866 	 */
4867 	SKIP(2);
4868 	SHRINK;
4869 
4870 	/*
4871 	 * Parse the target name and check for special support like
4872 	 * namespace.
4873 	 */
4874         target = xmlParsePITarget(ctxt);
4875 	if (target != NULL) {
4876 	    if ((RAW == '?') && (NXT(1) == '>')) {
4877 		if (input != ctxt->input) {
4878 		    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4879 	    "PI declaration doesn't start and stop in the same entity\n");
4880 		}
4881 		SKIP(2);
4882 
4883 		/*
4884 		 * SAX: PI detected.
4885 		 */
4886 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
4887 		    (ctxt->sax->processingInstruction != NULL))
4888 		    ctxt->sax->processingInstruction(ctxt->userData,
4889 		                                     target, NULL);
4890 		if (ctxt->instate != XML_PARSER_EOF)
4891 		    ctxt->instate = state;
4892 		return;
4893 	    }
4894 	    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4895 	    if (buf == NULL) {
4896 		xmlErrMemory(ctxt, NULL);
4897 		ctxt->instate = state;
4898 		return;
4899 	    }
4900 	    cur = CUR;
4901 	    if (!IS_BLANK(cur)) {
4902 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4903 			  "ParsePI: PI %s space expected\n", target);
4904 	    }
4905             SKIP_BLANKS;
4906 	    cur = CUR_CHAR(l);
4907 	    while (IS_CHAR(cur) && /* checked */
4908 		   ((cur != '?') || (NXT(1) != '>'))) {
4909 		if (len + 5 >= size) {
4910 		    xmlChar *tmp;
4911 
4912 		    size *= 2;
4913 		    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4914 		    if (tmp == NULL) {
4915 			xmlErrMemory(ctxt, NULL);
4916 			xmlFree(buf);
4917 			ctxt->instate = state;
4918 			return;
4919 		    }
4920 		    buf = tmp;
4921 		}
4922 		count++;
4923 		if (count > 50) {
4924 		    GROW;
4925                     if (ctxt->instate == XML_PARSER_EOF) {
4926                         xmlFree(buf);
4927                         return;
4928                     }
4929 		    count = 0;
4930 		}
4931 		COPY_BUF(l,buf,len,cur);
4932 		NEXTL(l);
4933 		cur = CUR_CHAR(l);
4934 		if (cur == 0) {
4935 		    SHRINK;
4936 		    GROW;
4937 		    cur = CUR_CHAR(l);
4938 		}
4939 	    }
4940 	    buf[len] = 0;
4941 	    if (cur != '?') {
4942 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4943 		      "ParsePI: PI %s never end ...\n", target);
4944 	    } else {
4945 		if (input != ctxt->input) {
4946 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4947 	    "PI declaration doesn't start and stop in the same entity\n");
4948 		}
4949 		SKIP(2);
4950 
4951 #ifdef LIBXML_CATALOG_ENABLED
4952 		if (((state == XML_PARSER_MISC) ||
4953 	             (state == XML_PARSER_START)) &&
4954 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
4955 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
4956 		    if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4957 			(allow == XML_CATA_ALLOW_ALL))
4958 			xmlParseCatalogPI(ctxt, buf);
4959 		}
4960 #endif
4961 
4962 
4963 		/*
4964 		 * SAX: PI detected.
4965 		 */
4966 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
4967 		    (ctxt->sax->processingInstruction != NULL))
4968 		    ctxt->sax->processingInstruction(ctxt->userData,
4969 		                                     target, buf);
4970 	    }
4971 	    xmlFree(buf);
4972 	} else {
4973 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4974 	}
4975 	if (ctxt->instate != XML_PARSER_EOF)
4976 	    ctxt->instate = state;
4977     }
4978 }
4979 
4980 /**
4981  * xmlParseNotationDecl:
4982  * @ctxt:  an XML parser context
4983  *
4984  * parse a notation declaration
4985  *
4986  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
4987  *
4988  * Hence there is actually 3 choices:
4989  *     'PUBLIC' S PubidLiteral
4990  *     'PUBLIC' S PubidLiteral S SystemLiteral
4991  * and 'SYSTEM' S SystemLiteral
4992  *
4993  * See the NOTE on xmlParseExternalID().
4994  */
4995 
4996 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)4997 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4998     const xmlChar *name;
4999     xmlChar *Pubid;
5000     xmlChar *Systemid;
5001 
5002     if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5003 	xmlParserInputPtr input = ctxt->input;
5004 	SHRINK;
5005 	SKIP(10);
5006 	if (!IS_BLANK_CH(CUR)) {
5007 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5008 			   "Space required after '<!NOTATION'\n");
5009 	    return;
5010 	}
5011 	SKIP_BLANKS;
5012 
5013         name = xmlParseName(ctxt);
5014 	if (name == NULL) {
5015 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5016 	    return;
5017 	}
5018 	if (!IS_BLANK_CH(CUR)) {
5019 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5020 		     "Space required after the NOTATION name'\n");
5021 	    return;
5022 	}
5023 	if (xmlStrchr(name, ':') != NULL) {
5024 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5025 		     "colon are forbidden from notation names '%s'\n",
5026 		     name, NULL, NULL);
5027 	}
5028 	SKIP_BLANKS;
5029 
5030 	/*
5031 	 * Parse the IDs.
5032 	 */
5033 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5034 	SKIP_BLANKS;
5035 
5036 	if (RAW == '>') {
5037 	    if (input != ctxt->input) {
5038 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5039 	"Notation declaration doesn't start and stop in the same entity\n");
5040 	    }
5041 	    NEXT;
5042 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5043 		(ctxt->sax->notationDecl != NULL))
5044 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5045 	} else {
5046 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5047 	}
5048 	if (Systemid != NULL) xmlFree(Systemid);
5049 	if (Pubid != NULL) xmlFree(Pubid);
5050     }
5051 }
5052 
5053 /**
5054  * xmlParseEntityDecl:
5055  * @ctxt:  an XML parser context
5056  *
5057  * parse <!ENTITY declarations
5058  *
5059  * [70] EntityDecl ::= GEDecl | PEDecl
5060  *
5061  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5062  *
5063  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5064  *
5065  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5066  *
5067  * [74] PEDef ::= EntityValue | ExternalID
5068  *
5069  * [76] NDataDecl ::= S 'NDATA' S Name
5070  *
5071  * [ VC: Notation Declared ]
5072  * The Name must match the declared name of a notation.
5073  */
5074 
5075 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5076 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5077     const xmlChar *name = NULL;
5078     xmlChar *value = NULL;
5079     xmlChar *URI = NULL, *literal = NULL;
5080     const xmlChar *ndata = NULL;
5081     int isParameter = 0;
5082     xmlChar *orig = NULL;
5083     int skipped;
5084 
5085     /* GROW; done in the caller */
5086     if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5087 	xmlParserInputPtr input = ctxt->input;
5088 	SHRINK;
5089 	SKIP(8);
5090 	skipped = SKIP_BLANKS;
5091 	if (skipped == 0) {
5092 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5093 			   "Space required after '<!ENTITY'\n");
5094 	}
5095 
5096 	if (RAW == '%') {
5097 	    NEXT;
5098 	    skipped = SKIP_BLANKS;
5099 	    if (skipped == 0) {
5100 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5101 			       "Space required after '%'\n");
5102 	    }
5103 	    isParameter = 1;
5104 	}
5105 
5106         name = xmlParseName(ctxt);
5107 	if (name == NULL) {
5108 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5109 	                   "xmlParseEntityDecl: no name\n");
5110             return;
5111 	}
5112 	if (xmlStrchr(name, ':') != NULL) {
5113 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5114 		     "colon are forbidden from entities names '%s'\n",
5115 		     name, NULL, NULL);
5116 	}
5117         skipped = SKIP_BLANKS;
5118 	if (skipped == 0) {
5119 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5120 			   "Space required after the entity name\n");
5121 	}
5122 
5123 	ctxt->instate = XML_PARSER_ENTITY_DECL;
5124 	/*
5125 	 * handle the various case of definitions...
5126 	 */
5127 	if (isParameter) {
5128 	    if ((RAW == '"') || (RAW == '\'')) {
5129 	        value = xmlParseEntityValue(ctxt, &orig);
5130 		if (value) {
5131 		    if ((ctxt->sax != NULL) &&
5132 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5133 			ctxt->sax->entityDecl(ctxt->userData, name,
5134 		                    XML_INTERNAL_PARAMETER_ENTITY,
5135 				    NULL, NULL, value);
5136 		}
5137 	    } else {
5138 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5139 		if ((URI == NULL) && (literal == NULL)) {
5140 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5141 		}
5142 		if (URI) {
5143 		    xmlURIPtr uri;
5144 
5145 		    uri = xmlParseURI((const char *) URI);
5146 		    if (uri == NULL) {
5147 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5148 				     "Invalid URI: %s\n", URI);
5149 			/*
5150 			 * This really ought to be a well formedness error
5151 			 * but the XML Core WG decided otherwise c.f. issue
5152 			 * E26 of the XML erratas.
5153 			 */
5154 		    } else {
5155 			if (uri->fragment != NULL) {
5156 			    /*
5157 			     * Okay this is foolish to block those but not
5158 			     * invalid URIs.
5159 			     */
5160 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5161 			} else {
5162 			    if ((ctxt->sax != NULL) &&
5163 				(!ctxt->disableSAX) &&
5164 				(ctxt->sax->entityDecl != NULL))
5165 				ctxt->sax->entityDecl(ctxt->userData, name,
5166 					    XML_EXTERNAL_PARAMETER_ENTITY,
5167 					    literal, URI, NULL);
5168 			}
5169 			xmlFreeURI(uri);
5170 		    }
5171 		}
5172 	    }
5173 	} else {
5174 	    if ((RAW == '"') || (RAW == '\'')) {
5175 	        value = xmlParseEntityValue(ctxt, &orig);
5176 		if ((ctxt->sax != NULL) &&
5177 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5178 		    ctxt->sax->entityDecl(ctxt->userData, name,
5179 				XML_INTERNAL_GENERAL_ENTITY,
5180 				NULL, NULL, value);
5181 		/*
5182 		 * For expat compatibility in SAX mode.
5183 		 */
5184 		if ((ctxt->myDoc == NULL) ||
5185 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5186 		    if (ctxt->myDoc == NULL) {
5187 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5188 			if (ctxt->myDoc == NULL) {
5189 			    xmlErrMemory(ctxt, "New Doc failed");
5190 			    return;
5191 			}
5192 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5193 		    }
5194 		    if (ctxt->myDoc->intSubset == NULL)
5195 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5196 					    BAD_CAST "fake", NULL, NULL);
5197 
5198 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5199 			              NULL, NULL, value);
5200 		}
5201 	    } else {
5202 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5203 		if ((URI == NULL) && (literal == NULL)) {
5204 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5205 		}
5206 		if (URI) {
5207 		    xmlURIPtr uri;
5208 
5209 		    uri = xmlParseURI((const char *)URI);
5210 		    if (uri == NULL) {
5211 		        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5212 				     "Invalid URI: %s\n", URI);
5213 			/*
5214 			 * This really ought to be a well formedness error
5215 			 * but the XML Core WG decided otherwise c.f. issue
5216 			 * E26 of the XML erratas.
5217 			 */
5218 		    } else {
5219 			if (uri->fragment != NULL) {
5220 			    /*
5221 			     * Okay this is foolish to block those but not
5222 			     * invalid URIs.
5223 			     */
5224 			    xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5225 			}
5226 			xmlFreeURI(uri);
5227 		    }
5228 		}
5229 		if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5230 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5231 				   "Space required before 'NDATA'\n");
5232 		}
5233 		SKIP_BLANKS;
5234 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5235 		    SKIP(5);
5236 		    if (!IS_BLANK_CH(CUR)) {
5237 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5238 				       "Space required after 'NDATA'\n");
5239 		    }
5240 		    SKIP_BLANKS;
5241 		    ndata = xmlParseName(ctxt);
5242 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5243 		        (ctxt->sax->unparsedEntityDecl != NULL))
5244 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5245 				    literal, URI, ndata);
5246 		} else {
5247 		    if ((ctxt->sax != NULL) &&
5248 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5249 			ctxt->sax->entityDecl(ctxt->userData, name,
5250 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5251 				    literal, URI, NULL);
5252 		    /*
5253 		     * For expat compatibility in SAX mode.
5254 		     * assuming the entity repalcement was asked for
5255 		     */
5256 		    if ((ctxt->replaceEntities != 0) &&
5257 			((ctxt->myDoc == NULL) ||
5258 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5259 			if (ctxt->myDoc == NULL) {
5260 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5261 			    if (ctxt->myDoc == NULL) {
5262 			        xmlErrMemory(ctxt, "New Doc failed");
5263 				return;
5264 			    }
5265 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5266 			}
5267 
5268 			if (ctxt->myDoc->intSubset == NULL)
5269 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5270 						BAD_CAST "fake", NULL, NULL);
5271 			xmlSAX2EntityDecl(ctxt, name,
5272 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5273 				          literal, URI, NULL);
5274 		    }
5275 		}
5276 	    }
5277 	}
5278 	if (ctxt->instate == XML_PARSER_EOF)
5279 	    return;
5280 	SKIP_BLANKS;
5281 	if (RAW != '>') {
5282 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5283 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5284 	} else {
5285 	    if (input != ctxt->input) {
5286 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5287 	"Entity declaration doesn't start and stop in the same entity\n");
5288 	    }
5289 	    NEXT;
5290 	}
5291 	if (orig != NULL) {
5292 	    /*
5293 	     * Ugly mechanism to save the raw entity value.
5294 	     */
5295 	    xmlEntityPtr cur = NULL;
5296 
5297 	    if (isParameter) {
5298 	        if ((ctxt->sax != NULL) &&
5299 		    (ctxt->sax->getParameterEntity != NULL))
5300 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5301 	    } else {
5302 	        if ((ctxt->sax != NULL) &&
5303 		    (ctxt->sax->getEntity != NULL))
5304 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5305 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5306 		    cur = xmlSAX2GetEntity(ctxt, name);
5307 		}
5308 	    }
5309             if (cur != NULL) {
5310 	        if (cur->orig != NULL)
5311 		    xmlFree(orig);
5312 		else
5313 		    cur->orig = orig;
5314 	    } else
5315 		xmlFree(orig);
5316 	}
5317 	if (value != NULL) xmlFree(value);
5318 	if (URI != NULL) xmlFree(URI);
5319 	if (literal != NULL) xmlFree(literal);
5320     }
5321 }
5322 
5323 /**
5324  * xmlParseDefaultDecl:
5325  * @ctxt:  an XML parser context
5326  * @value:  Receive a possible fixed default value for the attribute
5327  *
5328  * Parse an attribute default declaration
5329  *
5330  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5331  *
5332  * [ VC: Required Attribute ]
5333  * if the default declaration is the keyword #REQUIRED, then the
5334  * attribute must be specified for all elements of the type in the
5335  * attribute-list declaration.
5336  *
5337  * [ VC: Attribute Default Legal ]
5338  * The declared default value must meet the lexical constraints of
5339  * the declared attribute type c.f. xmlValidateAttributeDecl()
5340  *
5341  * [ VC: Fixed Attribute Default ]
5342  * if an attribute has a default value declared with the #FIXED
5343  * keyword, instances of that attribute must match the default value.
5344  *
5345  * [ WFC: No < in Attribute Values ]
5346  * handled in xmlParseAttValue()
5347  *
5348  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5349  *          or XML_ATTRIBUTE_FIXED.
5350  */
5351 
5352 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5353 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5354     int val;
5355     xmlChar *ret;
5356 
5357     *value = NULL;
5358     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5359 	SKIP(9);
5360 	return(XML_ATTRIBUTE_REQUIRED);
5361     }
5362     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5363 	SKIP(8);
5364 	return(XML_ATTRIBUTE_IMPLIED);
5365     }
5366     val = XML_ATTRIBUTE_NONE;
5367     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5368 	SKIP(6);
5369 	val = XML_ATTRIBUTE_FIXED;
5370 	if (!IS_BLANK_CH(CUR)) {
5371 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5372 			   "Space required after '#FIXED'\n");
5373 	}
5374 	SKIP_BLANKS;
5375     }
5376     ret = xmlParseAttValue(ctxt);
5377     ctxt->instate = XML_PARSER_DTD;
5378     if (ret == NULL) {
5379 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5380 		       "Attribute default value declaration error\n");
5381     } else
5382         *value = ret;
5383     return(val);
5384 }
5385 
5386 /**
5387  * xmlParseNotationType:
5388  * @ctxt:  an XML parser context
5389  *
5390  * parse an Notation attribute type.
5391  *
5392  * Note: the leading 'NOTATION' S part has already being parsed...
5393  *
5394  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5395  *
5396  * [ VC: Notation Attributes ]
5397  * Values of this type must match one of the notation names included
5398  * in the declaration; all notation names in the declaration must be declared.
5399  *
5400  * Returns: the notation attribute tree built while parsing
5401  */
5402 
5403 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5404 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5405     const xmlChar *name;
5406     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5407 
5408     if (RAW != '(') {
5409 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5410 	return(NULL);
5411     }
5412     SHRINK;
5413     do {
5414         NEXT;
5415 	SKIP_BLANKS;
5416         name = xmlParseName(ctxt);
5417 	if (name == NULL) {
5418 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5419 			   "Name expected in NOTATION declaration\n");
5420             xmlFreeEnumeration(ret);
5421 	    return(NULL);
5422 	}
5423 	tmp = ret;
5424 	while (tmp != NULL) {
5425 	    if (xmlStrEqual(name, tmp->name)) {
5426 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5427 	  "standalone: attribute notation value token %s duplicated\n",
5428 				 name, NULL);
5429 		if (!xmlDictOwns(ctxt->dict, name))
5430 		    xmlFree((xmlChar *) name);
5431 		break;
5432 	    }
5433 	    tmp = tmp->next;
5434 	}
5435 	if (tmp == NULL) {
5436 	    cur = xmlCreateEnumeration(name);
5437 	    if (cur == NULL) {
5438                 xmlFreeEnumeration(ret);
5439                 return(NULL);
5440             }
5441 	    if (last == NULL) ret = last = cur;
5442 	    else {
5443 		last->next = cur;
5444 		last = cur;
5445 	    }
5446 	}
5447 	SKIP_BLANKS;
5448     } while (RAW == '|');
5449     if (RAW != ')') {
5450 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5451         xmlFreeEnumeration(ret);
5452 	return(NULL);
5453     }
5454     NEXT;
5455     return(ret);
5456 }
5457 
5458 /**
5459  * xmlParseEnumerationType:
5460  * @ctxt:  an XML parser context
5461  *
5462  * parse an Enumeration attribute type.
5463  *
5464  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5465  *
5466  * [ VC: Enumeration ]
5467  * Values of this type must match one of the Nmtoken tokens in
5468  * the declaration
5469  *
5470  * Returns: the enumeration attribute tree built while parsing
5471  */
5472 
5473 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5474 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5475     xmlChar *name;
5476     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5477 
5478     if (RAW != '(') {
5479 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5480 	return(NULL);
5481     }
5482     SHRINK;
5483     do {
5484         NEXT;
5485 	SKIP_BLANKS;
5486         name = xmlParseNmtoken(ctxt);
5487 	if (name == NULL) {
5488 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5489 	    return(ret);
5490 	}
5491 	tmp = ret;
5492 	while (tmp != NULL) {
5493 	    if (xmlStrEqual(name, tmp->name)) {
5494 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5495 	  "standalone: attribute enumeration value token %s duplicated\n",
5496 				 name, NULL);
5497 		if (!xmlDictOwns(ctxt->dict, name))
5498 		    xmlFree(name);
5499 		break;
5500 	    }
5501 	    tmp = tmp->next;
5502 	}
5503 	if (tmp == NULL) {
5504 	    cur = xmlCreateEnumeration(name);
5505 	    if (!xmlDictOwns(ctxt->dict, name))
5506 		xmlFree(name);
5507 	    if (cur == NULL) {
5508                 xmlFreeEnumeration(ret);
5509                 return(NULL);
5510             }
5511 	    if (last == NULL) ret = last = cur;
5512 	    else {
5513 		last->next = cur;
5514 		last = cur;
5515 	    }
5516 	}
5517 	SKIP_BLANKS;
5518     } while (RAW == '|');
5519     if (RAW != ')') {
5520 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5521 	return(ret);
5522     }
5523     NEXT;
5524     return(ret);
5525 }
5526 
5527 /**
5528  * xmlParseEnumeratedType:
5529  * @ctxt:  an XML parser context
5530  * @tree:  the enumeration tree built while parsing
5531  *
5532  * parse an Enumerated attribute type.
5533  *
5534  * [57] EnumeratedType ::= NotationType | Enumeration
5535  *
5536  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5537  *
5538  *
5539  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5540  */
5541 
5542 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5543 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5544     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5545 	SKIP(8);
5546 	if (!IS_BLANK_CH(CUR)) {
5547 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5548 			   "Space required after 'NOTATION'\n");
5549 	    return(0);
5550 	}
5551         SKIP_BLANKS;
5552 	*tree = xmlParseNotationType(ctxt);
5553 	if (*tree == NULL) return(0);
5554 	return(XML_ATTRIBUTE_NOTATION);
5555     }
5556     *tree = xmlParseEnumerationType(ctxt);
5557     if (*tree == NULL) return(0);
5558     return(XML_ATTRIBUTE_ENUMERATION);
5559 }
5560 
5561 /**
5562  * xmlParseAttributeType:
5563  * @ctxt:  an XML parser context
5564  * @tree:  the enumeration tree built while parsing
5565  *
5566  * parse the Attribute list def for an element
5567  *
5568  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5569  *
5570  * [55] StringType ::= 'CDATA'
5571  *
5572  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5573  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5574  *
5575  * Validity constraints for attribute values syntax are checked in
5576  * xmlValidateAttributeValue()
5577  *
5578  * [ VC: ID ]
5579  * Values of type ID must match the Name production. A name must not
5580  * appear more than once in an XML document as a value of this type;
5581  * i.e., ID values must uniquely identify the elements which bear them.
5582  *
5583  * [ VC: One ID per Element Type ]
5584  * No element type may have more than one ID attribute specified.
5585  *
5586  * [ VC: ID Attribute Default ]
5587  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5588  *
5589  * [ VC: IDREF ]
5590  * Values of type IDREF must match the Name production, and values
5591  * of type IDREFS must match Names; each IDREF Name must match the value
5592  * of an ID attribute on some element in the XML document; i.e. IDREF
5593  * values must match the value of some ID attribute.
5594  *
5595  * [ VC: Entity Name ]
5596  * Values of type ENTITY must match the Name production, values
5597  * of type ENTITIES must match Names; each Entity Name must match the
5598  * name of an unparsed entity declared in the DTD.
5599  *
5600  * [ VC: Name Token ]
5601  * Values of type NMTOKEN must match the Nmtoken production; values
5602  * of type NMTOKENS must match Nmtokens.
5603  *
5604  * Returns the attribute type
5605  */
5606 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5607 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5608     SHRINK;
5609     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5610 	SKIP(5);
5611 	return(XML_ATTRIBUTE_CDATA);
5612      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5613 	SKIP(6);
5614 	return(XML_ATTRIBUTE_IDREFS);
5615      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5616 	SKIP(5);
5617 	return(XML_ATTRIBUTE_IDREF);
5618      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5619         SKIP(2);
5620 	return(XML_ATTRIBUTE_ID);
5621      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5622 	SKIP(6);
5623 	return(XML_ATTRIBUTE_ENTITY);
5624      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5625 	SKIP(8);
5626 	return(XML_ATTRIBUTE_ENTITIES);
5627      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5628 	SKIP(8);
5629 	return(XML_ATTRIBUTE_NMTOKENS);
5630      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5631 	SKIP(7);
5632 	return(XML_ATTRIBUTE_NMTOKEN);
5633      }
5634      return(xmlParseEnumeratedType(ctxt, tree));
5635 }
5636 
5637 /**
5638  * xmlParseAttributeListDecl:
5639  * @ctxt:  an XML parser context
5640  *
5641  * : parse the Attribute list def for an element
5642  *
5643  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5644  *
5645  * [53] AttDef ::= S Name S AttType S DefaultDecl
5646  *
5647  */
5648 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5649 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5650     const xmlChar *elemName;
5651     const xmlChar *attrName;
5652     xmlEnumerationPtr tree;
5653 
5654     if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5655 	xmlParserInputPtr input = ctxt->input;
5656 
5657 	SKIP(9);
5658 	if (!IS_BLANK_CH(CUR)) {
5659 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5660 		                 "Space required after '<!ATTLIST'\n");
5661 	}
5662         SKIP_BLANKS;
5663         elemName = xmlParseName(ctxt);
5664 	if (elemName == NULL) {
5665 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5666 			   "ATTLIST: no name for Element\n");
5667 	    return;
5668 	}
5669 	SKIP_BLANKS;
5670 	GROW;
5671 	while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5672 	    const xmlChar *check = CUR_PTR;
5673 	    int type;
5674 	    int def;
5675 	    xmlChar *defaultValue = NULL;
5676 
5677 	    GROW;
5678             tree = NULL;
5679 	    attrName = xmlParseName(ctxt);
5680 	    if (attrName == NULL) {
5681 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5682 			       "ATTLIST: no name for Attribute\n");
5683 		break;
5684 	    }
5685 	    GROW;
5686 	    if (!IS_BLANK_CH(CUR)) {
5687 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5688 		        "Space required after the attribute name\n");
5689 		break;
5690 	    }
5691 	    SKIP_BLANKS;
5692 
5693 	    type = xmlParseAttributeType(ctxt, &tree);
5694 	    if (type <= 0) {
5695 	        break;
5696 	    }
5697 
5698 	    GROW;
5699 	    if (!IS_BLANK_CH(CUR)) {
5700 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 			       "Space required after the attribute type\n");
5702 	        if (tree != NULL)
5703 		    xmlFreeEnumeration(tree);
5704 		break;
5705 	    }
5706 	    SKIP_BLANKS;
5707 
5708 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
5709 	    if (def <= 0) {
5710                 if (defaultValue != NULL)
5711 		    xmlFree(defaultValue);
5712 	        if (tree != NULL)
5713 		    xmlFreeEnumeration(tree);
5714 	        break;
5715 	    }
5716 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5717 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
5718 
5719 	    GROW;
5720             if (RAW != '>') {
5721 		if (!IS_BLANK_CH(CUR)) {
5722 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723 			"Space required after the attribute default value\n");
5724 		    if (defaultValue != NULL)
5725 			xmlFree(defaultValue);
5726 		    if (tree != NULL)
5727 			xmlFreeEnumeration(tree);
5728 		    break;
5729 		}
5730 		SKIP_BLANKS;
5731 	    }
5732 	    if (check == CUR_PTR) {
5733 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5734 		            "in xmlParseAttributeListDecl\n");
5735 		if (defaultValue != NULL)
5736 		    xmlFree(defaultValue);
5737 	        if (tree != NULL)
5738 		    xmlFreeEnumeration(tree);
5739 		break;
5740 	    }
5741 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5742 		(ctxt->sax->attributeDecl != NULL))
5743 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5744 	                        type, def, defaultValue, tree);
5745 	    else if (tree != NULL)
5746 		xmlFreeEnumeration(tree);
5747 
5748 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
5749 	        (def != XML_ATTRIBUTE_IMPLIED) &&
5750 		(def != XML_ATTRIBUTE_REQUIRED)) {
5751 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5752 	    }
5753 	    if (ctxt->sax2) {
5754 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5755 	    }
5756 	    if (defaultValue != NULL)
5757 	        xmlFree(defaultValue);
5758 	    GROW;
5759 	}
5760 	if (RAW == '>') {
5761 	    if (input != ctxt->input) {
5762 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5763     "Attribute list declaration doesn't start and stop in the same entity\n",
5764                                  NULL, NULL);
5765 	    }
5766 	    NEXT;
5767 	}
5768     }
5769 }
5770 
5771 /**
5772  * xmlParseElementMixedContentDecl:
5773  * @ctxt:  an XML parser context
5774  * @inputchk:  the input used for the current entity, needed for boundary checks
5775  *
5776  * parse the declaration for a Mixed Element content
5777  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5778  *
5779  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5780  *                '(' S? '#PCDATA' S? ')'
5781  *
5782  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5783  *
5784  * [ VC: No Duplicate Types ]
5785  * The same name must not appear more than once in a single
5786  * mixed-content declaration.
5787  *
5788  * returns: the list of the xmlElementContentPtr describing the element choices
5789  */
5790 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5791 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5792     xmlElementContentPtr ret = NULL, cur = NULL, n;
5793     const xmlChar *elem = NULL;
5794 
5795     GROW;
5796     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5797 	SKIP(7);
5798 	SKIP_BLANKS;
5799 	SHRINK;
5800 	if (RAW == ')') {
5801 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5802 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5803 "Element content declaration doesn't start and stop in the same entity\n",
5804                                  NULL, NULL);
5805 	    }
5806 	    NEXT;
5807 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5808 	    if (ret == NULL)
5809 	        return(NULL);
5810 	    if (RAW == '*') {
5811 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
5812 		NEXT;
5813 	    }
5814 	    return(ret);
5815 	}
5816 	if ((RAW == '(') || (RAW == '|')) {
5817 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5818 	    if (ret == NULL) return(NULL);
5819 	}
5820 	while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
5821 	    NEXT;
5822 	    if (elem == NULL) {
5823 	        ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5824 		if (ret == NULL) return(NULL);
5825 		ret->c1 = cur;
5826 		if (cur != NULL)
5827 		    cur->parent = ret;
5828 		cur = ret;
5829 	    } else {
5830 	        n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5831 		if (n == NULL) return(NULL);
5832 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5833 		if (n->c1 != NULL)
5834 		    n->c1->parent = n;
5835 	        cur->c2 = n;
5836 		if (n != NULL)
5837 		    n->parent = cur;
5838 		cur = n;
5839 	    }
5840 	    SKIP_BLANKS;
5841 	    elem = xmlParseName(ctxt);
5842 	    if (elem == NULL) {
5843 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5844 			"xmlParseElementMixedContentDecl : Name expected\n");
5845 		xmlFreeDocElementContent(ctxt->myDoc, cur);
5846 		return(NULL);
5847 	    }
5848 	    SKIP_BLANKS;
5849 	    GROW;
5850 	}
5851 	if ((RAW == ')') && (NXT(1) == '*')) {
5852 	    if (elem != NULL) {
5853 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5854 		                               XML_ELEMENT_CONTENT_ELEMENT);
5855 		if (cur->c2 != NULL)
5856 		    cur->c2->parent = cur;
5857             }
5858             if (ret != NULL)
5859                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5860 	    if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5861 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5862 "Element content declaration doesn't start and stop in the same entity\n",
5863 				 NULL, NULL);
5864 	    }
5865 	    SKIP(2);
5866 	} else {
5867 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
5868 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5869 	    return(NULL);
5870 	}
5871 
5872     } else {
5873 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5874     }
5875     return(ret);
5876 }
5877 
5878 /**
5879  * xmlParseElementChildrenContentDeclPriv:
5880  * @ctxt:  an XML parser context
5881  * @inputchk:  the input used for the current entity, needed for boundary checks
5882  * @depth: the level of recursion
5883  *
5884  * parse the declaration for a Mixed Element content
5885  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5886  *
5887  *
5888  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5889  *
5890  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5891  *
5892  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5893  *
5894  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5895  *
5896  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5897  * TODO Parameter-entity replacement text must be properly nested
5898  *	with parenthesized groups. That is to say, if either of the
5899  *	opening or closing parentheses in a choice, seq, or Mixed
5900  *	construct is contained in the replacement text for a parameter
5901  *	entity, both must be contained in the same replacement text. For
5902  *	interoperability, if a parameter-entity reference appears in a
5903  *	choice, seq, or Mixed construct, its replacement text should not
5904  *	be empty, and neither the first nor last non-blank character of
5905  *	the replacement text should be a connector (| or ,).
5906  *
5907  * Returns the tree of xmlElementContentPtr describing the element
5908  *          hierarchy.
5909  */
5910 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)5911 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5912                                        int depth) {
5913     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5914     const xmlChar *elem;
5915     xmlChar type = 0;
5916 
5917     if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5918         (depth >  2048)) {
5919         xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5920 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5921                           depth);
5922 	return(NULL);
5923     }
5924     SKIP_BLANKS;
5925     GROW;
5926     if (RAW == '(') {
5927 	int inputid = ctxt->input->id;
5928 
5929         /* Recurse on first child */
5930 	NEXT;
5931 	SKIP_BLANKS;
5932         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5933                                                            depth + 1);
5934 	SKIP_BLANKS;
5935 	GROW;
5936     } else {
5937 	elem = xmlParseName(ctxt);
5938 	if (elem == NULL) {
5939 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5940 	    return(NULL);
5941 	}
5942         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5943 	if (cur == NULL) {
5944 	    xmlErrMemory(ctxt, NULL);
5945 	    return(NULL);
5946 	}
5947 	GROW;
5948 	if (RAW == '?') {
5949 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
5950 	    NEXT;
5951 	} else if (RAW == '*') {
5952 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
5953 	    NEXT;
5954 	} else if (RAW == '+') {
5955 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5956 	    NEXT;
5957 	} else {
5958 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5959 	}
5960 	GROW;
5961     }
5962     SKIP_BLANKS;
5963     SHRINK;
5964     while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
5965         /*
5966 	 * Each loop we parse one separator and one element.
5967 	 */
5968         if (RAW == ',') {
5969 	    if (type == 0) type = CUR;
5970 
5971 	    /*
5972 	     * Detect "Name | Name , Name" error
5973 	     */
5974 	    else if (type != CUR) {
5975 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5976 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
5977 		                  type);
5978 		if ((last != NULL) && (last != ret))
5979 		    xmlFreeDocElementContent(ctxt->myDoc, last);
5980 		if (ret != NULL)
5981 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
5982 		return(NULL);
5983 	    }
5984 	    NEXT;
5985 
5986 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5987 	    if (op == NULL) {
5988 		if ((last != NULL) && (last != ret))
5989 		    xmlFreeDocElementContent(ctxt->myDoc, last);
5990 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
5991 		return(NULL);
5992 	    }
5993 	    if (last == NULL) {
5994 		op->c1 = ret;
5995 		if (ret != NULL)
5996 		    ret->parent = op;
5997 		ret = cur = op;
5998 	    } else {
5999 	        cur->c2 = op;
6000 		if (op != NULL)
6001 		    op->parent = cur;
6002 		op->c1 = last;
6003 		if (last != NULL)
6004 		    last->parent = op;
6005 		cur =op;
6006 		last = NULL;
6007 	    }
6008 	} else if (RAW == '|') {
6009 	    if (type == 0) type = CUR;
6010 
6011 	    /*
6012 	     * Detect "Name , Name | Name" error
6013 	     */
6014 	    else if (type != CUR) {
6015 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6016 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6017 				  type);
6018 		if ((last != NULL) && (last != ret))
6019 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6020 		if (ret != NULL)
6021 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6022 		return(NULL);
6023 	    }
6024 	    NEXT;
6025 
6026 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6027 	    if (op == NULL) {
6028 		if ((last != NULL) && (last != ret))
6029 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6030 		if (ret != NULL)
6031 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6032 		return(NULL);
6033 	    }
6034 	    if (last == NULL) {
6035 		op->c1 = ret;
6036 		if (ret != NULL)
6037 		    ret->parent = op;
6038 		ret = cur = op;
6039 	    } else {
6040 	        cur->c2 = op;
6041 		if (op != NULL)
6042 		    op->parent = cur;
6043 		op->c1 = last;
6044 		if (last != NULL)
6045 		    last->parent = op;
6046 		cur =op;
6047 		last = NULL;
6048 	    }
6049 	} else {
6050 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6051 	    if ((last != NULL) && (last != ret))
6052 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6053 	    if (ret != NULL)
6054 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6055 	    return(NULL);
6056 	}
6057 	GROW;
6058 	SKIP_BLANKS;
6059 	GROW;
6060 	if (RAW == '(') {
6061 	    int inputid = ctxt->input->id;
6062 	    /* Recurse on second child */
6063 	    NEXT;
6064 	    SKIP_BLANKS;
6065 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6066                                                           depth + 1);
6067 	    SKIP_BLANKS;
6068 	} else {
6069 	    elem = xmlParseName(ctxt);
6070 	    if (elem == NULL) {
6071 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6072 		if (ret != NULL)
6073 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6074 		return(NULL);
6075 	    }
6076 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6077 	    if (last == NULL) {
6078 		if (ret != NULL)
6079 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6080 		return(NULL);
6081 	    }
6082 	    if (RAW == '?') {
6083 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6084 		NEXT;
6085 	    } else if (RAW == '*') {
6086 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6087 		NEXT;
6088 	    } else if (RAW == '+') {
6089 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6090 		NEXT;
6091 	    } else {
6092 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6093 	    }
6094 	}
6095 	SKIP_BLANKS;
6096 	GROW;
6097     }
6098     if ((cur != NULL) && (last != NULL)) {
6099         cur->c2 = last;
6100 	if (last != NULL)
6101 	    last->parent = cur;
6102     }
6103     if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6104 	xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6105 "Element content declaration doesn't start and stop in the same entity\n",
6106 			 NULL, NULL);
6107     }
6108     NEXT;
6109     if (RAW == '?') {
6110 	if (ret != NULL) {
6111 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6112 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6113 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6114 	    else
6115 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6116 	}
6117 	NEXT;
6118     } else if (RAW == '*') {
6119 	if (ret != NULL) {
6120 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6121 	    cur = ret;
6122 	    /*
6123 	     * Some normalization:
6124 	     * (a | b* | c?)* == (a | b | c)*
6125 	     */
6126 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6127 		if ((cur->c1 != NULL) &&
6128 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6129 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6130 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6131 		if ((cur->c2 != NULL) &&
6132 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6133 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6134 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6135 		cur = cur->c2;
6136 	    }
6137 	}
6138 	NEXT;
6139     } else if (RAW == '+') {
6140 	if (ret != NULL) {
6141 	    int found = 0;
6142 
6143 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6144 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6145 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6146 	    else
6147 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6148 	    /*
6149 	     * Some normalization:
6150 	     * (a | b*)+ == (a | b)*
6151 	     * (a | b?)+ == (a | b)*
6152 	     */
6153 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6154 		if ((cur->c1 != NULL) &&
6155 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6156 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6157 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6158 		    found = 1;
6159 		}
6160 		if ((cur->c2 != NULL) &&
6161 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6162 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6163 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6164 		    found = 1;
6165 		}
6166 		cur = cur->c2;
6167 	    }
6168 	    if (found)
6169 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6170 	}
6171 	NEXT;
6172     }
6173     return(ret);
6174 }
6175 
6176 /**
6177  * xmlParseElementChildrenContentDecl:
6178  * @ctxt:  an XML parser context
6179  * @inputchk:  the input used for the current entity, needed for boundary checks
6180  *
6181  * parse the declaration for a Mixed Element content
6182  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6183  *
6184  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6185  *
6186  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6187  *
6188  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6189  *
6190  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6191  *
6192  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6193  * TODO Parameter-entity replacement text must be properly nested
6194  *	with parenthesized groups. That is to say, if either of the
6195  *	opening or closing parentheses in a choice, seq, or Mixed
6196  *	construct is contained in the replacement text for a parameter
6197  *	entity, both must be contained in the same replacement text. For
6198  *	interoperability, if a parameter-entity reference appears in a
6199  *	choice, seq, or Mixed construct, its replacement text should not
6200  *	be empty, and neither the first nor last non-blank character of
6201  *	the replacement text should be a connector (| or ,).
6202  *
6203  * Returns the tree of xmlElementContentPtr describing the element
6204  *          hierarchy.
6205  */
6206 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6207 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6208     /* stub left for API/ABI compat */
6209     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6210 }
6211 
6212 /**
6213  * xmlParseElementContentDecl:
6214  * @ctxt:  an XML parser context
6215  * @name:  the name of the element being defined.
6216  * @result:  the Element Content pointer will be stored here if any
6217  *
6218  * parse the declaration for an Element content either Mixed or Children,
6219  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6220  *
6221  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6222  *
6223  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6224  */
6225 
6226 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6227 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6228                            xmlElementContentPtr *result) {
6229 
6230     xmlElementContentPtr tree = NULL;
6231     int inputid = ctxt->input->id;
6232     int res;
6233 
6234     *result = NULL;
6235 
6236     if (RAW != '(') {
6237 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6238 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6239 	return(-1);
6240     }
6241     NEXT;
6242     GROW;
6243     if (ctxt->instate == XML_PARSER_EOF)
6244         return(-1);
6245     SKIP_BLANKS;
6246     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6247         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6248 	res = XML_ELEMENT_TYPE_MIXED;
6249     } else {
6250         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6251 	res = XML_ELEMENT_TYPE_ELEMENT;
6252     }
6253     SKIP_BLANKS;
6254     *result = tree;
6255     return(res);
6256 }
6257 
6258 /**
6259  * xmlParseElementDecl:
6260  * @ctxt:  an XML parser context
6261  *
6262  * parse an Element declaration.
6263  *
6264  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6265  *
6266  * [ VC: Unique Element Type Declaration ]
6267  * No element type may be declared more than once
6268  *
6269  * Returns the type of the element, or -1 in case of error
6270  */
6271 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6272 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6273     const xmlChar *name;
6274     int ret = -1;
6275     xmlElementContentPtr content  = NULL;
6276 
6277     /* GROW; done in the caller */
6278     if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6279 	xmlParserInputPtr input = ctxt->input;
6280 
6281 	SKIP(9);
6282 	if (!IS_BLANK_CH(CUR)) {
6283 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6284 		           "Space required after 'ELEMENT'\n");
6285 	}
6286         SKIP_BLANKS;
6287         name = xmlParseName(ctxt);
6288 	if (name == NULL) {
6289 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6290 			   "xmlParseElementDecl: no name for Element\n");
6291 	    return(-1);
6292 	}
6293 	while ((RAW == 0) && (ctxt->inputNr > 1))
6294 	    xmlPopInput(ctxt);
6295 	if (!IS_BLANK_CH(CUR)) {
6296 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6297 			   "Space required after the element name\n");
6298 	}
6299         SKIP_BLANKS;
6300 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6301 	    SKIP(5);
6302 	    /*
6303 	     * Element must always be empty.
6304 	     */
6305 	    ret = XML_ELEMENT_TYPE_EMPTY;
6306 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6307 	           (NXT(2) == 'Y')) {
6308 	    SKIP(3);
6309 	    /*
6310 	     * Element is a generic container.
6311 	     */
6312 	    ret = XML_ELEMENT_TYPE_ANY;
6313 	} else if (RAW == '(') {
6314 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6315 	} else {
6316 	    /*
6317 	     * [ WFC: PEs in Internal Subset ] error handling.
6318 	     */
6319 	    if ((RAW == '%') && (ctxt->external == 0) &&
6320 	        (ctxt->inputNr == 1)) {
6321 		xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6322 	  "PEReference: forbidden within markup decl in internal subset\n");
6323 	    } else {
6324 		xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6325 		      "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6326             }
6327 	    return(-1);
6328 	}
6329 
6330 	SKIP_BLANKS;
6331 	/*
6332 	 * Pop-up of finished entities.
6333 	 */
6334 	while ((RAW == 0) && (ctxt->inputNr > 1))
6335 	    xmlPopInput(ctxt);
6336 	SKIP_BLANKS;
6337 
6338 	if (RAW != '>') {
6339 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6340 	    if (content != NULL) {
6341 		xmlFreeDocElementContent(ctxt->myDoc, content);
6342 	    }
6343 	} else {
6344 	    if (input != ctxt->input) {
6345 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346     "Element declaration doesn't start and stop in the same entity\n");
6347 	    }
6348 
6349 	    NEXT;
6350 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6351 		(ctxt->sax->elementDecl != NULL)) {
6352 		if (content != NULL)
6353 		    content->parent = NULL;
6354 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6355 		                       content);
6356 		if ((content != NULL) && (content->parent == NULL)) {
6357 		    /*
6358 		     * this is a trick: if xmlAddElementDecl is called,
6359 		     * instead of copying the full tree it is plugged directly
6360 		     * if called from the parser. Avoid duplicating the
6361 		     * interfaces or change the API/ABI
6362 		     */
6363 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6364 		}
6365 	    } else if (content != NULL) {
6366 		xmlFreeDocElementContent(ctxt->myDoc, content);
6367 	    }
6368 	}
6369     }
6370     return(ret);
6371 }
6372 
6373 /**
6374  * xmlParseConditionalSections
6375  * @ctxt:  an XML parser context
6376  *
6377  * [61] conditionalSect ::= includeSect | ignoreSect
6378  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6379  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6380  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6381  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6382  */
6383 
6384 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6385 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6386     int id = ctxt->input->id;
6387 
6388     SKIP(3);
6389     SKIP_BLANKS;
6390     if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6391 	SKIP(7);
6392 	SKIP_BLANKS;
6393 	if (RAW != '[') {
6394 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6395 	} else {
6396 	    if (ctxt->input->id != id) {
6397 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6398 	    "All markup of the conditional section is not in the same entity\n",
6399 				     NULL, NULL);
6400 	    }
6401 	    NEXT;
6402 	}
6403 	if (xmlParserDebugEntities) {
6404 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6405 		xmlGenericError(xmlGenericErrorContext,
6406 			"%s(%d): ", ctxt->input->filename,
6407 			ctxt->input->line);
6408 	    xmlGenericError(xmlGenericErrorContext,
6409 		    "Entering INCLUDE Conditional Section\n");
6410 	}
6411 
6412 	while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6413 	        (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6414 	    const xmlChar *check = CUR_PTR;
6415 	    unsigned int cons = ctxt->input->consumed;
6416 
6417 	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6418 		xmlParseConditionalSections(ctxt);
6419 	    } else if (IS_BLANK_CH(CUR)) {
6420 		NEXT;
6421 	    } else if (RAW == '%') {
6422 		xmlParsePEReference(ctxt);
6423 	    } else
6424 		xmlParseMarkupDecl(ctxt);
6425 
6426 	    /*
6427 	     * Pop-up of finished entities.
6428 	     */
6429 	    while ((RAW == 0) && (ctxt->inputNr > 1))
6430 		xmlPopInput(ctxt);
6431 
6432 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6433 		xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6434 		break;
6435 	    }
6436 	}
6437 	if (xmlParserDebugEntities) {
6438 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6439 		xmlGenericError(xmlGenericErrorContext,
6440 			"%s(%d): ", ctxt->input->filename,
6441 			ctxt->input->line);
6442 	    xmlGenericError(xmlGenericErrorContext,
6443 		    "Leaving INCLUDE Conditional Section\n");
6444 	}
6445 
6446     } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6447 	int state;
6448 	xmlParserInputState instate;
6449 	int depth = 0;
6450 
6451 	SKIP(6);
6452 	SKIP_BLANKS;
6453 	if (RAW != '[') {
6454 	    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6455 	} else {
6456 	    if (ctxt->input->id != id) {
6457 		xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6458 	    "All markup of the conditional section is not in the same entity\n",
6459 				     NULL, NULL);
6460 	    }
6461 	    NEXT;
6462 	}
6463 	if (xmlParserDebugEntities) {
6464 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6465 		xmlGenericError(xmlGenericErrorContext,
6466 			"%s(%d): ", ctxt->input->filename,
6467 			ctxt->input->line);
6468 	    xmlGenericError(xmlGenericErrorContext,
6469 		    "Entering IGNORE Conditional Section\n");
6470 	}
6471 
6472 	/*
6473 	 * Parse up to the end of the conditional section
6474 	 * But disable SAX event generating DTD building in the meantime
6475 	 */
6476 	state = ctxt->disableSAX;
6477 	instate = ctxt->instate;
6478 	if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6479 	ctxt->instate = XML_PARSER_IGNORE;
6480 
6481 	while (((depth >= 0) && (RAW != 0)) &&
6482                (ctxt->instate != XML_PARSER_EOF)) {
6483 	  if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6484 	    depth++;
6485 	    SKIP(3);
6486 	    continue;
6487 	  }
6488 	  if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6489 	    if (--depth >= 0) SKIP(3);
6490 	    continue;
6491 	  }
6492 	  NEXT;
6493 	  continue;
6494 	}
6495 
6496 	ctxt->disableSAX = state;
6497 	ctxt->instate = instate;
6498 
6499 	if (xmlParserDebugEntities) {
6500 	    if ((ctxt->input != NULL) && (ctxt->input->filename))
6501 		xmlGenericError(xmlGenericErrorContext,
6502 			"%s(%d): ", ctxt->input->filename,
6503 			ctxt->input->line);
6504 	    xmlGenericError(xmlGenericErrorContext,
6505 		    "Leaving IGNORE Conditional Section\n");
6506 	}
6507 
6508     } else {
6509 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6510     }
6511 
6512     if (RAW == 0)
6513         SHRINK;
6514 
6515     if (RAW == 0) {
6516 	xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6517     } else {
6518 	if (ctxt->input->id != id) {
6519 	    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520 	"All markup of the conditional section is not in the same entity\n",
6521 				 NULL, NULL);
6522 	}
6523         SKIP(3);
6524     }
6525 }
6526 
6527 /**
6528  * xmlParseMarkupDecl:
6529  * @ctxt:  an XML parser context
6530  *
6531  * parse Markup declarations
6532  *
6533  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6534  *                     NotationDecl | PI | Comment
6535  *
6536  * [ VC: Proper Declaration/PE Nesting ]
6537  * Parameter-entity replacement text must be properly nested with
6538  * markup declarations. That is to say, if either the first character
6539  * or the last character of a markup declaration (markupdecl above) is
6540  * contained in the replacement text for a parameter-entity reference,
6541  * both must be contained in the same replacement text.
6542  *
6543  * [ WFC: PEs in Internal Subset ]
6544  * In the internal DTD subset, parameter-entity references can occur
6545  * only where markup declarations can occur, not within markup declarations.
6546  * (This does not apply to references that occur in external parameter
6547  * entities or to the external subset.)
6548  */
6549 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6550 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6551     GROW;
6552     if (CUR == '<') {
6553         if (NXT(1) == '!') {
6554 	    switch (NXT(2)) {
6555 	        case 'E':
6556 		    if (NXT(3) == 'L')
6557 			xmlParseElementDecl(ctxt);
6558 		    else if (NXT(3) == 'N')
6559 			xmlParseEntityDecl(ctxt);
6560 		    break;
6561 	        case 'A':
6562 		    xmlParseAttributeListDecl(ctxt);
6563 		    break;
6564 	        case 'N':
6565 		    xmlParseNotationDecl(ctxt);
6566 		    break;
6567 	        case '-':
6568 		    xmlParseComment(ctxt);
6569 		    break;
6570 		default:
6571 		    /* there is an error but it will be detected later */
6572 		    break;
6573 	    }
6574 	} else if (NXT(1) == '?') {
6575 	    xmlParsePI(ctxt);
6576 	}
6577     }
6578     /*
6579      * This is only for internal subset. On external entities,
6580      * the replacement is done before parsing stage
6581      */
6582     if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6583 	xmlParsePEReference(ctxt);
6584 
6585     /*
6586      * Conditional sections are allowed from entities included
6587      * by PE References in the internal subset.
6588      */
6589     if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6590         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6591 	    xmlParseConditionalSections(ctxt);
6592 	}
6593     }
6594 
6595     ctxt->instate = XML_PARSER_DTD;
6596 }
6597 
6598 /**
6599  * xmlParseTextDecl:
6600  * @ctxt:  an XML parser context
6601  *
6602  * parse an XML declaration header for external entities
6603  *
6604  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6605  */
6606 
6607 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6608 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6609     xmlChar *version;
6610     const xmlChar *encoding;
6611 
6612     /*
6613      * We know that '<?xml' is here.
6614      */
6615     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6616 	SKIP(5);
6617     } else {
6618 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6619 	return;
6620     }
6621 
6622     if (!IS_BLANK_CH(CUR)) {
6623 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6624 		       "Space needed after '<?xml'\n");
6625     }
6626     SKIP_BLANKS;
6627 
6628     /*
6629      * We may have the VersionInfo here.
6630      */
6631     version = xmlParseVersionInfo(ctxt);
6632     if (version == NULL)
6633 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
6634     else {
6635 	if (!IS_BLANK_CH(CUR)) {
6636 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6637 		           "Space needed here\n");
6638 	}
6639     }
6640     ctxt->input->version = version;
6641 
6642     /*
6643      * We must have the encoding declaration
6644      */
6645     encoding = xmlParseEncodingDecl(ctxt);
6646     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6647 	/*
6648 	 * The XML REC instructs us to stop parsing right here
6649 	 */
6650         return;
6651     }
6652     if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6653 	xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6654 		       "Missing encoding in text declaration\n");
6655     }
6656 
6657     SKIP_BLANKS;
6658     if ((RAW == '?') && (NXT(1) == '>')) {
6659         SKIP(2);
6660     } else if (RAW == '>') {
6661         /* Deprecated old WD ... */
6662 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6663 	NEXT;
6664     } else {
6665 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6666 	MOVETO_ENDTAG(CUR_PTR);
6667 	NEXT;
6668     }
6669 }
6670 
6671 /**
6672  * xmlParseExternalSubset:
6673  * @ctxt:  an XML parser context
6674  * @ExternalID: the external identifier
6675  * @SystemID: the system identifier (or URL)
6676  *
6677  * parse Markup declarations from an external subset
6678  *
6679  * [30] extSubset ::= textDecl? extSubsetDecl
6680  *
6681  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6682  */
6683 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6684 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6685                        const xmlChar *SystemID) {
6686     xmlDetectSAX2(ctxt);
6687     GROW;
6688 
6689     if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6690         (ctxt->input->end - ctxt->input->cur >= 4)) {
6691         xmlChar start[4];
6692 	xmlCharEncoding enc;
6693 
6694 	start[0] = RAW;
6695 	start[1] = NXT(1);
6696 	start[2] = NXT(2);
6697 	start[3] = NXT(3);
6698 	enc = xmlDetectCharEncoding(start, 4);
6699 	if (enc != XML_CHAR_ENCODING_NONE)
6700 	    xmlSwitchEncoding(ctxt, enc);
6701     }
6702 
6703     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6704 	xmlParseTextDecl(ctxt);
6705 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6706 	    /*
6707 	     * The XML REC instructs us to stop parsing right here
6708 	     */
6709 	    ctxt->instate = XML_PARSER_EOF;
6710 	    return;
6711 	}
6712     }
6713     if (ctxt->myDoc == NULL) {
6714         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6715 	if (ctxt->myDoc == NULL) {
6716 	    xmlErrMemory(ctxt, "New Doc failed");
6717 	    return;
6718 	}
6719 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
6720     }
6721     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6722         xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6723 
6724     ctxt->instate = XML_PARSER_DTD;
6725     ctxt->external = 1;
6726     while (((RAW == '<') && (NXT(1) == '?')) ||
6727            ((RAW == '<') && (NXT(1) == '!')) ||
6728 	   (RAW == '%') || IS_BLANK_CH(CUR)) {
6729 	const xmlChar *check = CUR_PTR;
6730 	unsigned int cons = ctxt->input->consumed;
6731 
6732 	GROW;
6733         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6734 	    xmlParseConditionalSections(ctxt);
6735 	} else if (IS_BLANK_CH(CUR)) {
6736 	    NEXT;
6737 	} else if (RAW == '%') {
6738             xmlParsePEReference(ctxt);
6739 	} else
6740 	    xmlParseMarkupDecl(ctxt);
6741 
6742 	/*
6743 	 * Pop-up of finished entities.
6744 	 */
6745 	while ((RAW == 0) && (ctxt->inputNr > 1))
6746 	    xmlPopInput(ctxt);
6747 
6748 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6749 	    xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6750 	    break;
6751 	}
6752     }
6753 
6754     if (RAW != 0) {
6755 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6756     }
6757 
6758 }
6759 
6760 /**
6761  * xmlParseReference:
6762  * @ctxt:  an XML parser context
6763  *
6764  * parse and handle entity references in content, depending on the SAX
6765  * interface, this may end-up in a call to character() if this is a
6766  * CharRef, a predefined entity, if there is no reference() callback.
6767  * or if the parser was asked to switch to that mode.
6768  *
6769  * [67] Reference ::= EntityRef | CharRef
6770  */
6771 void
xmlParseReference(xmlParserCtxtPtr ctxt)6772 xmlParseReference(xmlParserCtxtPtr ctxt) {
6773     xmlEntityPtr ent;
6774     xmlChar *val;
6775     int was_checked;
6776     xmlNodePtr list = NULL;
6777     xmlParserErrors ret = XML_ERR_OK;
6778 
6779 
6780     if (RAW != '&')
6781         return;
6782 
6783     /*
6784      * Simple case of a CharRef
6785      */
6786     if (NXT(1) == '#') {
6787 	int i = 0;
6788 	xmlChar out[10];
6789 	int hex = NXT(2);
6790 	int value = xmlParseCharRef(ctxt);
6791 
6792 	if (value == 0)
6793 	    return;
6794 	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6795 	    /*
6796 	     * So we are using non-UTF-8 buffers
6797 	     * Check that the char fit on 8bits, if not
6798 	     * generate a CharRef.
6799 	     */
6800 	    if (value <= 0xFF) {
6801 		out[0] = value;
6802 		out[1] = 0;
6803 		if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6804 		    (!ctxt->disableSAX))
6805 		    ctxt->sax->characters(ctxt->userData, out, 1);
6806 	    } else {
6807 		if ((hex == 'x') || (hex == 'X'))
6808 		    snprintf((char *)out, sizeof(out), "#x%X", value);
6809 		else
6810 		    snprintf((char *)out, sizeof(out), "#%d", value);
6811 		if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6812 		    (!ctxt->disableSAX))
6813 		    ctxt->sax->reference(ctxt->userData, out);
6814 	    }
6815 	} else {
6816 	    /*
6817 	     * Just encode the value in UTF-8
6818 	     */
6819 	    COPY_BUF(0 ,out, i, value);
6820 	    out[i] = 0;
6821 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6822 		(!ctxt->disableSAX))
6823 		ctxt->sax->characters(ctxt->userData, out, i);
6824 	}
6825 	return;
6826     }
6827 
6828     /*
6829      * We are seeing an entity reference
6830      */
6831     ent = xmlParseEntityRef(ctxt);
6832     if (ent == NULL) return;
6833     if (!ctxt->wellFormed)
6834 	return;
6835     was_checked = ent->checked;
6836 
6837     /* special case of predefined entities */
6838     if ((ent->name == NULL) ||
6839         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6840 	val = ent->content;
6841 	if (val == NULL) return;
6842 	/*
6843 	 * inline the entity.
6844 	 */
6845 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6846 	    (!ctxt->disableSAX))
6847 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6848 	return;
6849     }
6850 
6851     /*
6852      * The first reference to the entity trigger a parsing phase
6853      * where the ent->children is filled with the result from
6854      * the parsing.
6855      */
6856     if (ent->checked == 0) {
6857 	unsigned long oldnbent = ctxt->nbentities;
6858 
6859 	/*
6860 	 * This is a bit hackish but this seems the best
6861 	 * way to make sure both SAX and DOM entity support
6862 	 * behaves okay.
6863 	 */
6864 	void *user_data;
6865 	if (ctxt->userData == ctxt)
6866 	    user_data = NULL;
6867 	else
6868 	    user_data = ctxt->userData;
6869 
6870 	/*
6871 	 * Check that this entity is well formed
6872 	 * 4.3.2: An internal general parsed entity is well-formed
6873 	 * if its replacement text matches the production labeled
6874 	 * content.
6875 	 */
6876 	if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6877 	    ctxt->depth++;
6878 	    ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6879 	                                              user_data, &list);
6880 	    ctxt->depth--;
6881 
6882 	} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6883 	    ctxt->depth++;
6884 	    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6885 	                                   user_data, ctxt->depth, ent->URI,
6886 					   ent->ExternalID, &list);
6887 	    ctxt->depth--;
6888 	} else {
6889 	    ret = XML_ERR_ENTITY_PE_INTERNAL;
6890 	    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6891 			 "invalid entity type found\n", NULL);
6892 	}
6893 
6894 	/*
6895 	 * Store the number of entities needing parsing for this entity
6896 	 * content and do checkings
6897 	 */
6898 	ent->checked = ctxt->nbentities - oldnbent;
6899 	if (ret == XML_ERR_ENTITY_LOOP) {
6900 	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6901 	    xmlFreeNodeList(list);
6902 	    return;
6903 	}
6904 	if (xmlParserEntityCheck(ctxt, 0, ent)) {
6905 	    xmlFreeNodeList(list);
6906 	    return;
6907 	}
6908 
6909 	if ((ret == XML_ERR_OK) && (list != NULL)) {
6910 	    if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6911 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6912 		(ent->children == NULL)) {
6913 		ent->children = list;
6914 		if (ctxt->replaceEntities) {
6915 		    /*
6916 		     * Prune it directly in the generated document
6917 		     * except for single text nodes.
6918 		     */
6919 		    if (((list->type == XML_TEXT_NODE) &&
6920 			 (list->next == NULL)) ||
6921 			(ctxt->parseMode == XML_PARSE_READER)) {
6922 			list->parent = (xmlNodePtr) ent;
6923 			list = NULL;
6924 			ent->owner = 1;
6925 		    } else {
6926 			ent->owner = 0;
6927 			while (list != NULL) {
6928 			    list->parent = (xmlNodePtr) ctxt->node;
6929 			    list->doc = ctxt->myDoc;
6930 			    if (list->next == NULL)
6931 				ent->last = list;
6932 			    list = list->next;
6933 			}
6934 			list = ent->children;
6935 #ifdef LIBXML_LEGACY_ENABLED
6936 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6937 			  xmlAddEntityReference(ent, list, NULL);
6938 #endif /* LIBXML_LEGACY_ENABLED */
6939 		    }
6940 		} else {
6941 		    ent->owner = 1;
6942 		    while (list != NULL) {
6943 			list->parent = (xmlNodePtr) ent;
6944 			if (list->next == NULL)
6945 			    ent->last = list;
6946 			list = list->next;
6947 		    }
6948 		}
6949 	    } else {
6950 		xmlFreeNodeList(list);
6951 		list = NULL;
6952 	    }
6953 	} else if ((ret != XML_ERR_OK) &&
6954 		   (ret != XML_WAR_UNDECLARED_ENTITY)) {
6955 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6956 		     "Entity '%s' failed to parse\n", ent->name);
6957 	} else if (list != NULL) {
6958 	    xmlFreeNodeList(list);
6959 	    list = NULL;
6960 	}
6961 	if (ent->checked == 0)
6962 	    ent->checked = 1;
6963     } else if (ent->checked != 1) {
6964 	ctxt->nbentities += ent->checked;
6965     }
6966 
6967     /*
6968      * Now that the entity content has been gathered
6969      * provide it to the application, this can take different forms based
6970      * on the parsing modes.
6971      */
6972     if (ent->children == NULL) {
6973 	/*
6974 	 * Probably running in SAX mode and the callbacks don't
6975 	 * build the entity content. So unless we already went
6976 	 * though parsing for first checking go though the entity
6977 	 * content to generate callbacks associated to the entity
6978 	 */
6979 	if (was_checked != 0) {
6980 	    void *user_data;
6981 	    /*
6982 	     * This is a bit hackish but this seems the best
6983 	     * way to make sure both SAX and DOM entity support
6984 	     * behaves okay.
6985 	     */
6986 	    if (ctxt->userData == ctxt)
6987 		user_data = NULL;
6988 	    else
6989 		user_data = ctxt->userData;
6990 
6991 	    if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6992 		ctxt->depth++;
6993 		ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6994 				   ent->content, user_data, NULL);
6995 		ctxt->depth--;
6996 	    } else if (ent->etype ==
6997 		       XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6998 		ctxt->depth++;
6999 		ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7000 			   ctxt->sax, user_data, ctxt->depth,
7001 			   ent->URI, ent->ExternalID, NULL);
7002 		ctxt->depth--;
7003 	    } else {
7004 		ret = XML_ERR_ENTITY_PE_INTERNAL;
7005 		xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7006 			     "invalid entity type found\n", NULL);
7007 	    }
7008 	    if (ret == XML_ERR_ENTITY_LOOP) {
7009 		xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7010 		return;
7011 	    }
7012 	}
7013 	if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7014 	    (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7015 	    /*
7016 	     * Entity reference callback comes second, it's somewhat
7017 	     * superfluous but a compatibility to historical behaviour
7018 	     */
7019 	    ctxt->sax->reference(ctxt->userData, ent->name);
7020 	}
7021 	return;
7022     }
7023 
7024     /*
7025      * If we didn't get any children for the entity being built
7026      */
7027     if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7028 	(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7029 	/*
7030 	 * Create a node.
7031 	 */
7032 	ctxt->sax->reference(ctxt->userData, ent->name);
7033 	return;
7034     }
7035 
7036     if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7037 	/*
7038 	 * There is a problem on the handling of _private for entities
7039 	 * (bug 155816): Should we copy the content of the field from
7040 	 * the entity (possibly overwriting some value set by the user
7041 	 * when a copy is created), should we leave it alone, or should
7042 	 * we try to take care of different situations?  The problem
7043 	 * is exacerbated by the usage of this field by the xmlReader.
7044 	 * To fix this bug, we look at _private on the created node
7045 	 * and, if it's NULL, we copy in whatever was in the entity.
7046 	 * If it's not NULL we leave it alone.  This is somewhat of a
7047 	 * hack - maybe we should have further tests to determine
7048 	 * what to do.
7049 	 */
7050 	if ((ctxt->node != NULL) && (ent->children != NULL)) {
7051 	    /*
7052 	     * Seems we are generating the DOM content, do
7053 	     * a simple tree copy for all references except the first
7054 	     * In the first occurrence list contains the replacement.
7055 	     * progressive == 2 means we are operating on the Reader
7056 	     * and since nodes are discarded we must copy all the time.
7057 	     */
7058 	    if (((list == NULL) && (ent->owner == 0)) ||
7059 		(ctxt->parseMode == XML_PARSE_READER)) {
7060 		xmlNodePtr nw = NULL, cur, firstChild = NULL;
7061 
7062 		/*
7063 		 * when operating on a reader, the entities definitions
7064 		 * are always owning the entities subtree.
7065 		if (ctxt->parseMode == XML_PARSE_READER)
7066 		    ent->owner = 1;
7067 		 */
7068 
7069 		cur = ent->children;
7070 		while (cur != NULL) {
7071 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7072 		    if (nw != NULL) {
7073 			if (nw->_private == NULL)
7074 			    nw->_private = cur->_private;
7075 			if (firstChild == NULL){
7076 			    firstChild = nw;
7077 			}
7078 			nw = xmlAddChild(ctxt->node, nw);
7079 		    }
7080 		    if (cur == ent->last) {
7081 			/*
7082 			 * needed to detect some strange empty
7083 			 * node cases in the reader tests
7084 			 */
7085 			if ((ctxt->parseMode == XML_PARSE_READER) &&
7086 			    (nw != NULL) &&
7087 			    (nw->type == XML_ELEMENT_NODE) &&
7088 			    (nw->children == NULL))
7089 			    nw->extra = 1;
7090 
7091 			break;
7092 		    }
7093 		    cur = cur->next;
7094 		}
7095 #ifdef LIBXML_LEGACY_ENABLED
7096 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7097 		  xmlAddEntityReference(ent, firstChild, nw);
7098 #endif /* LIBXML_LEGACY_ENABLED */
7099 	    } else if (list == NULL) {
7100 		xmlNodePtr nw = NULL, cur, next, last,
7101 			   firstChild = NULL;
7102 		/*
7103 		 * Copy the entity child list and make it the new
7104 		 * entity child list. The goal is to make sure any
7105 		 * ID or REF referenced will be the one from the
7106 		 * document content and not the entity copy.
7107 		 */
7108 		cur = ent->children;
7109 		ent->children = NULL;
7110 		last = ent->last;
7111 		ent->last = NULL;
7112 		while (cur != NULL) {
7113 		    next = cur->next;
7114 		    cur->next = NULL;
7115 		    cur->parent = NULL;
7116 		    nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7117 		    if (nw != NULL) {
7118 			if (nw->_private == NULL)
7119 			    nw->_private = cur->_private;
7120 			if (firstChild == NULL){
7121 			    firstChild = cur;
7122 			}
7123 			xmlAddChild((xmlNodePtr) ent, nw);
7124 			xmlAddChild(ctxt->node, cur);
7125 		    }
7126 		    if (cur == last)
7127 			break;
7128 		    cur = next;
7129 		}
7130 		if (ent->owner == 0)
7131 		    ent->owner = 1;
7132 #ifdef LIBXML_LEGACY_ENABLED
7133 		if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7134 		  xmlAddEntityReference(ent, firstChild, nw);
7135 #endif /* LIBXML_LEGACY_ENABLED */
7136 	    } else {
7137 		const xmlChar *nbktext;
7138 
7139 		/*
7140 		 * the name change is to avoid coalescing of the
7141 		 * node with a possible previous text one which
7142 		 * would make ent->children a dangling pointer
7143 		 */
7144 		nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7145 					-1);
7146 		if (ent->children->type == XML_TEXT_NODE)
7147 		    ent->children->name = nbktext;
7148 		if ((ent->last != ent->children) &&
7149 		    (ent->last->type == XML_TEXT_NODE))
7150 		    ent->last->name = nbktext;
7151 		xmlAddChildList(ctxt->node, ent->children);
7152 	    }
7153 
7154 	    /*
7155 	     * This is to avoid a nasty side effect, see
7156 	     * characters() in SAX.c
7157 	     */
7158 	    ctxt->nodemem = 0;
7159 	    ctxt->nodelen = 0;
7160 	    return;
7161 	}
7162     }
7163 }
7164 
7165 /**
7166  * xmlParseEntityRef:
7167  * @ctxt:  an XML parser context
7168  *
7169  * parse ENTITY references declarations
7170  *
7171  * [68] EntityRef ::= '&' Name ';'
7172  *
7173  * [ WFC: Entity Declared ]
7174  * In a document without any DTD, a document with only an internal DTD
7175  * subset which contains no parameter entity references, or a document
7176  * with "standalone='yes'", the Name given in the entity reference
7177  * must match that in an entity declaration, except that well-formed
7178  * documents need not declare any of the following entities: amp, lt,
7179  * gt, apos, quot.  The declaration of a parameter entity must precede
7180  * any reference to it.  Similarly, the declaration of a general entity
7181  * must precede any reference to it which appears in a default value in an
7182  * attribute-list declaration. Note that if entities are declared in the
7183  * external subset or in external parameter entities, a non-validating
7184  * processor is not obligated to read and process their declarations;
7185  * for such documents, the rule that an entity must be declared is a
7186  * well-formedness constraint only if standalone='yes'.
7187  *
7188  * [ WFC: Parsed Entity ]
7189  * An entity reference must not contain the name of an unparsed entity
7190  *
7191  * Returns the xmlEntityPtr if found, or NULL otherwise.
7192  */
7193 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7194 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7195     const xmlChar *name;
7196     xmlEntityPtr ent = NULL;
7197 
7198     GROW;
7199     if (ctxt->instate == XML_PARSER_EOF)
7200         return(NULL);
7201 
7202     if (RAW != '&')
7203         return(NULL);
7204     NEXT;
7205     name = xmlParseName(ctxt);
7206     if (name == NULL) {
7207 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7208 		       "xmlParseEntityRef: no name\n");
7209         return(NULL);
7210     }
7211     if (RAW != ';') {
7212 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7213 	return(NULL);
7214     }
7215     NEXT;
7216 
7217     /*
7218      * Predefined entites override any extra definition
7219      */
7220     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7221         ent = xmlGetPredefinedEntity(name);
7222         if (ent != NULL)
7223             return(ent);
7224     }
7225 
7226     /*
7227      * Increate the number of entity references parsed
7228      */
7229     ctxt->nbentities++;
7230 
7231     /*
7232      * Ask first SAX for entity resolution, otherwise try the
7233      * entities which may have stored in the parser context.
7234      */
7235     if (ctxt->sax != NULL) {
7236 	if (ctxt->sax->getEntity != NULL)
7237 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7238 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7239 	    (ctxt->options & XML_PARSE_OLDSAX))
7240 	    ent = xmlGetPredefinedEntity(name);
7241 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7242 	    (ctxt->userData==ctxt)) {
7243 	    ent = xmlSAX2GetEntity(ctxt, name);
7244 	}
7245     }
7246     if (ctxt->instate == XML_PARSER_EOF)
7247 	return(NULL);
7248     /*
7249      * [ WFC: Entity Declared ]
7250      * In a document without any DTD, a document with only an
7251      * internal DTD subset which contains no parameter entity
7252      * references, or a document with "standalone='yes'", the
7253      * Name given in the entity reference must match that in an
7254      * entity declaration, except that well-formed documents
7255      * need not declare any of the following entities: amp, lt,
7256      * gt, apos, quot.
7257      * The declaration of a parameter entity must precede any
7258      * reference to it.
7259      * Similarly, the declaration of a general entity must
7260      * precede any reference to it which appears in a default
7261      * value in an attribute-list declaration. Note that if
7262      * entities are declared in the external subset or in
7263      * external parameter entities, a non-validating processor
7264      * is not obligated to read and process their declarations;
7265      * for such documents, the rule that an entity must be
7266      * declared is a well-formedness constraint only if
7267      * standalone='yes'.
7268      */
7269     if (ent == NULL) {
7270 	if ((ctxt->standalone == 1) ||
7271 	    ((ctxt->hasExternalSubset == 0) &&
7272 	     (ctxt->hasPErefs == 0))) {
7273 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7274 		     "Entity '%s' not defined\n", name);
7275 	} else {
7276 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7277 		     "Entity '%s' not defined\n", name);
7278 	    if ((ctxt->inSubset == 0) &&
7279 		(ctxt->sax != NULL) &&
7280 		(ctxt->sax->reference != NULL)) {
7281 		ctxt->sax->reference(ctxt->userData, name);
7282 	    }
7283 	}
7284 	ctxt->valid = 0;
7285     }
7286 
7287     /*
7288      * [ WFC: Parsed Entity ]
7289      * An entity reference must not contain the name of an
7290      * unparsed entity
7291      */
7292     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7293 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7294 		 "Entity reference to unparsed entity %s\n", name);
7295     }
7296 
7297     /*
7298      * [ WFC: No External Entity References ]
7299      * Attribute values cannot contain direct or indirect
7300      * entity references to external entities.
7301      */
7302     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7303 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7304 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7305 	     "Attribute references external entity '%s'\n", name);
7306     }
7307     /*
7308      * [ WFC: No < in Attribute Values ]
7309      * The replacement text of any entity referred to directly or
7310      * indirectly in an attribute value (other than "&lt;") must
7311      * not contain a <.
7312      */
7313     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7314 	     (ent != NULL) && (ent->content != NULL) &&
7315 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7316 	     (xmlStrchr(ent->content, '<'))) {
7317 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7318     "'<' in entity '%s' is not allowed in attributes values\n", name);
7319     }
7320 
7321     /*
7322      * Internal check, no parameter entities here ...
7323      */
7324     else {
7325 	switch (ent->etype) {
7326 	    case XML_INTERNAL_PARAMETER_ENTITY:
7327 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7328 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7329 	     "Attempt to reference the parameter entity '%s'\n",
7330 			      name);
7331 	    break;
7332 	    default:
7333 	    break;
7334 	}
7335     }
7336 
7337     /*
7338      * [ WFC: No Recursion ]
7339      * A parsed entity must not contain a recursive reference
7340      * to itself, either directly or indirectly.
7341      * Done somewhere else
7342      */
7343     return(ent);
7344 }
7345 
7346 /**
7347  * xmlParseStringEntityRef:
7348  * @ctxt:  an XML parser context
7349  * @str:  a pointer to an index in the string
7350  *
7351  * parse ENTITY references declarations, but this version parses it from
7352  * a string value.
7353  *
7354  * [68] EntityRef ::= '&' Name ';'
7355  *
7356  * [ WFC: Entity Declared ]
7357  * In a document without any DTD, a document with only an internal DTD
7358  * subset which contains no parameter entity references, or a document
7359  * with "standalone='yes'", the Name given in the entity reference
7360  * must match that in an entity declaration, except that well-formed
7361  * documents need not declare any of the following entities: amp, lt,
7362  * gt, apos, quot.  The declaration of a parameter entity must precede
7363  * any reference to it.  Similarly, the declaration of a general entity
7364  * must precede any reference to it which appears in a default value in an
7365  * attribute-list declaration. Note that if entities are declared in the
7366  * external subset or in external parameter entities, a non-validating
7367  * processor is not obligated to read and process their declarations;
7368  * for such documents, the rule that an entity must be declared is a
7369  * well-formedness constraint only if standalone='yes'.
7370  *
7371  * [ WFC: Parsed Entity ]
7372  * An entity reference must not contain the name of an unparsed entity
7373  *
7374  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7375  * is updated to the current location in the string.
7376  */
7377 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7378 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7379     xmlChar *name;
7380     const xmlChar *ptr;
7381     xmlChar cur;
7382     xmlEntityPtr ent = NULL;
7383 
7384     if ((str == NULL) || (*str == NULL))
7385         return(NULL);
7386     ptr = *str;
7387     cur = *ptr;
7388     if (cur != '&')
7389 	return(NULL);
7390 
7391     ptr++;
7392     name = xmlParseStringName(ctxt, &ptr);
7393     if (name == NULL) {
7394 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7395 		       "xmlParseStringEntityRef: no name\n");
7396 	*str = ptr;
7397 	return(NULL);
7398     }
7399     if (*ptr != ';') {
7400 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7401         xmlFree(name);
7402 	*str = ptr;
7403 	return(NULL);
7404     }
7405     ptr++;
7406 
7407 
7408     /*
7409      * Predefined entites override any extra definition
7410      */
7411     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7412         ent = xmlGetPredefinedEntity(name);
7413         if (ent != NULL) {
7414             xmlFree(name);
7415             *str = ptr;
7416             return(ent);
7417         }
7418     }
7419 
7420     /*
7421      * Increate the number of entity references parsed
7422      */
7423     ctxt->nbentities++;
7424 
7425     /*
7426      * Ask first SAX for entity resolution, otherwise try the
7427      * entities which may have stored in the parser context.
7428      */
7429     if (ctxt->sax != NULL) {
7430 	if (ctxt->sax->getEntity != NULL)
7431 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7432 	if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7433 	    ent = xmlGetPredefinedEntity(name);
7434 	if ((ent == NULL) && (ctxt->userData==ctxt)) {
7435 	    ent = xmlSAX2GetEntity(ctxt, name);
7436 	}
7437     }
7438     if (ctxt->instate == XML_PARSER_EOF) {
7439 	xmlFree(name);
7440 	return(NULL);
7441     }
7442 
7443     /*
7444      * [ WFC: Entity Declared ]
7445      * In a document without any DTD, a document with only an
7446      * internal DTD subset which contains no parameter entity
7447      * references, or a document with "standalone='yes'", the
7448      * Name given in the entity reference must match that in an
7449      * entity declaration, except that well-formed documents
7450      * need not declare any of the following entities: amp, lt,
7451      * gt, apos, quot.
7452      * The declaration of a parameter entity must precede any
7453      * reference to it.
7454      * Similarly, the declaration of a general entity must
7455      * precede any reference to it which appears in a default
7456      * value in an attribute-list declaration. Note that if
7457      * entities are declared in the external subset or in
7458      * external parameter entities, a non-validating processor
7459      * is not obligated to read and process their declarations;
7460      * for such documents, the rule that an entity must be
7461      * declared is a well-formedness constraint only if
7462      * standalone='yes'.
7463      */
7464     if (ent == NULL) {
7465 	if ((ctxt->standalone == 1) ||
7466 	    ((ctxt->hasExternalSubset == 0) &&
7467 	     (ctxt->hasPErefs == 0))) {
7468 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7469 		     "Entity '%s' not defined\n", name);
7470 	} else {
7471 	    xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7472 			  "Entity '%s' not defined\n",
7473 			  name);
7474 	}
7475 	/* TODO ? check regressions ctxt->valid = 0; */
7476     }
7477 
7478     /*
7479      * [ WFC: Parsed Entity ]
7480      * An entity reference must not contain the name of an
7481      * unparsed entity
7482      */
7483     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7484 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7485 		 "Entity reference to unparsed entity %s\n", name);
7486     }
7487 
7488     /*
7489      * [ WFC: No External Entity References ]
7490      * Attribute values cannot contain direct or indirect
7491      * entity references to external entities.
7492      */
7493     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7494 	     (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7495 	xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7496 	 "Attribute references external entity '%s'\n", name);
7497     }
7498     /*
7499      * [ WFC: No < in Attribute Values ]
7500      * The replacement text of any entity referred to directly or
7501      * indirectly in an attribute value (other than "&lt;") must
7502      * not contain a <.
7503      */
7504     else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7505 	     (ent != NULL) && (ent->content != NULL) &&
7506 	     (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7507 	     (xmlStrchr(ent->content, '<'))) {
7508 	xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7509      "'<' in entity '%s' is not allowed in attributes values\n",
7510 			  name);
7511     }
7512 
7513     /*
7514      * Internal check, no parameter entities here ...
7515      */
7516     else {
7517 	switch (ent->etype) {
7518 	    case XML_INTERNAL_PARAMETER_ENTITY:
7519 	    case XML_EXTERNAL_PARAMETER_ENTITY:
7520 		xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7521 	     "Attempt to reference the parameter entity '%s'\n",
7522 				  name);
7523 	    break;
7524 	    default:
7525 	    break;
7526 	}
7527     }
7528 
7529     /*
7530      * [ WFC: No Recursion ]
7531      * A parsed entity must not contain a recursive reference
7532      * to itself, either directly or indirectly.
7533      * Done somewhere else
7534      */
7535 
7536     xmlFree(name);
7537     *str = ptr;
7538     return(ent);
7539 }
7540 
7541 /**
7542  * xmlParsePEReference:
7543  * @ctxt:  an XML parser context
7544  *
7545  * parse PEReference declarations
7546  * The entity content is handled directly by pushing it's content as
7547  * a new input stream.
7548  *
7549  * [69] PEReference ::= '%' Name ';'
7550  *
7551  * [ WFC: No Recursion ]
7552  * A parsed entity must not contain a recursive
7553  * reference to itself, either directly or indirectly.
7554  *
7555  * [ WFC: Entity Declared ]
7556  * In a document without any DTD, a document with only an internal DTD
7557  * subset which contains no parameter entity references, or a document
7558  * with "standalone='yes'", ...  ... The declaration of a parameter
7559  * entity must precede any reference to it...
7560  *
7561  * [ VC: Entity Declared ]
7562  * In a document with an external subset or external parameter entities
7563  * with "standalone='no'", ...  ... The declaration of a parameter entity
7564  * must precede any reference to it...
7565  *
7566  * [ WFC: In DTD ]
7567  * Parameter-entity references may only appear in the DTD.
7568  * NOTE: misleading but this is handled.
7569  */
7570 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7571 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7572 {
7573     const xmlChar *name;
7574     xmlEntityPtr entity = NULL;
7575     xmlParserInputPtr input;
7576 
7577     if (RAW != '%')
7578         return;
7579     NEXT;
7580     name = xmlParseName(ctxt);
7581     if (name == NULL) {
7582 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7583 		       "xmlParsePEReference: no name\n");
7584 	return;
7585     }
7586     if (RAW != ';') {
7587 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7588         return;
7589     }
7590 
7591     NEXT;
7592 
7593     /*
7594      * Increate the number of entity references parsed
7595      */
7596     ctxt->nbentities++;
7597 
7598     /*
7599      * Request the entity from SAX
7600      */
7601     if ((ctxt->sax != NULL) &&
7602 	(ctxt->sax->getParameterEntity != NULL))
7603 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7604     if (ctxt->instate == XML_PARSER_EOF)
7605 	return;
7606     if (entity == NULL) {
7607 	/*
7608 	 * [ WFC: Entity Declared ]
7609 	 * In a document without any DTD, a document with only an
7610 	 * internal DTD subset which contains no parameter entity
7611 	 * references, or a document with "standalone='yes'", ...
7612 	 * ... The declaration of a parameter entity must precede
7613 	 * any reference to it...
7614 	 */
7615 	if ((ctxt->standalone == 1) ||
7616 	    ((ctxt->hasExternalSubset == 0) &&
7617 	     (ctxt->hasPErefs == 0))) {
7618 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7619 			      "PEReference: %%%s; not found\n",
7620 			      name);
7621 	} else {
7622 	    /*
7623 	     * [ VC: Entity Declared ]
7624 	     * In a document with an external subset or external
7625 	     * parameter entities with "standalone='no'", ...
7626 	     * ... The declaration of a parameter entity must
7627 	     * precede any reference to it...
7628 	     */
7629 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7630 			  "PEReference: %%%s; not found\n",
7631 			  name, NULL);
7632 	    ctxt->valid = 0;
7633 	}
7634     } else {
7635 	/*
7636 	 * Internal checking in case the entity quest barfed
7637 	 */
7638 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7639 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7640 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7641 		  "Internal: %%%s; is not a parameter entity\n",
7642 			  name, NULL);
7643 	} else if (ctxt->input->free != deallocblankswrapper) {
7644 	    input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7645 	    if (xmlPushInput(ctxt, input) < 0)
7646 		return;
7647 	} else {
7648 	    /*
7649 	     * TODO !!!
7650 	     * handle the extra spaces added before and after
7651 	     * c.f. http://www.w3.org/TR/REC-xml#as-PE
7652 	     */
7653 	    input = xmlNewEntityInputStream(ctxt, entity);
7654 	    if (xmlPushInput(ctxt, input) < 0)
7655 		return;
7656 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7657 		(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7658 		(IS_BLANK_CH(NXT(5)))) {
7659 		xmlParseTextDecl(ctxt);
7660 		if (ctxt->errNo ==
7661 		    XML_ERR_UNSUPPORTED_ENCODING) {
7662 		    /*
7663 		     * The XML REC instructs us to stop parsing
7664 		     * right here
7665 		     */
7666 		    ctxt->instate = XML_PARSER_EOF;
7667 		    return;
7668 		}
7669 	    }
7670 	}
7671     }
7672     ctxt->hasPErefs = 1;
7673 }
7674 
7675 /**
7676  * xmlLoadEntityContent:
7677  * @ctxt:  an XML parser context
7678  * @entity: an unloaded system entity
7679  *
7680  * Load the original content of the given system entity from the
7681  * ExternalID/SystemID given. This is to be used for Included in Literal
7682  * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7683  *
7684  * Returns 0 in case of success and -1 in case of failure
7685  */
7686 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7687 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7688     xmlParserInputPtr input;
7689     xmlBufferPtr buf;
7690     int l, c;
7691     int count = 0;
7692 
7693     if ((ctxt == NULL) || (entity == NULL) ||
7694         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7695 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7696 	(entity->content != NULL)) {
7697 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7698 	            "xmlLoadEntityContent parameter error");
7699         return(-1);
7700     }
7701 
7702     if (xmlParserDebugEntities)
7703 	xmlGenericError(xmlGenericErrorContext,
7704 		"Reading %s entity content input\n", entity->name);
7705 
7706     buf = xmlBufferCreate();
7707     if (buf == NULL) {
7708 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7709 	            "xmlLoadEntityContent parameter error");
7710         return(-1);
7711     }
7712 
7713     input = xmlNewEntityInputStream(ctxt, entity);
7714     if (input == NULL) {
7715 	xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7716 	            "xmlLoadEntityContent input error");
7717 	xmlBufferFree(buf);
7718         return(-1);
7719     }
7720 
7721     /*
7722      * Push the entity as the current input, read char by char
7723      * saving to the buffer until the end of the entity or an error
7724      */
7725     if (xmlPushInput(ctxt, input) < 0) {
7726         xmlBufferFree(buf);
7727 	return(-1);
7728     }
7729 
7730     GROW;
7731     c = CUR_CHAR(l);
7732     while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7733            (IS_CHAR(c))) {
7734         xmlBufferAdd(buf, ctxt->input->cur, l);
7735 	if (count++ > 100) {
7736 	    count = 0;
7737 	    GROW;
7738             if (ctxt->instate == XML_PARSER_EOF) {
7739                 xmlBufferFree(buf);
7740                 return(-1);
7741             }
7742 	}
7743 	NEXTL(l);
7744 	c = CUR_CHAR(l);
7745     }
7746 
7747     if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7748         xmlPopInput(ctxt);
7749     } else if (!IS_CHAR(c)) {
7750         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7751                           "xmlLoadEntityContent: invalid char value %d\n",
7752 	                  c);
7753 	xmlBufferFree(buf);
7754 	return(-1);
7755     }
7756     entity->content = buf->content;
7757     buf->content = NULL;
7758     xmlBufferFree(buf);
7759 
7760     return(0);
7761 }
7762 
7763 /**
7764  * xmlParseStringPEReference:
7765  * @ctxt:  an XML parser context
7766  * @str:  a pointer to an index in the string
7767  *
7768  * parse PEReference declarations
7769  *
7770  * [69] PEReference ::= '%' Name ';'
7771  *
7772  * [ WFC: No Recursion ]
7773  * A parsed entity must not contain a recursive
7774  * reference to itself, either directly or indirectly.
7775  *
7776  * [ WFC: Entity Declared ]
7777  * In a document without any DTD, a document with only an internal DTD
7778  * subset which contains no parameter entity references, or a document
7779  * with "standalone='yes'", ...  ... The declaration of a parameter
7780  * entity must precede any reference to it...
7781  *
7782  * [ VC: Entity Declared ]
7783  * In a document with an external subset or external parameter entities
7784  * with "standalone='no'", ...  ... The declaration of a parameter entity
7785  * must precede any reference to it...
7786  *
7787  * [ WFC: In DTD ]
7788  * Parameter-entity references may only appear in the DTD.
7789  * NOTE: misleading but this is handled.
7790  *
7791  * Returns the string of the entity content.
7792  *         str is updated to the current value of the index
7793  */
7794 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)7795 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7796     const xmlChar *ptr;
7797     xmlChar cur;
7798     xmlChar *name;
7799     xmlEntityPtr entity = NULL;
7800 
7801     if ((str == NULL) || (*str == NULL)) return(NULL);
7802     ptr = *str;
7803     cur = *ptr;
7804     if (cur != '%')
7805         return(NULL);
7806     ptr++;
7807     name = xmlParseStringName(ctxt, &ptr);
7808     if (name == NULL) {
7809 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7810 		       "xmlParseStringPEReference: no name\n");
7811 	*str = ptr;
7812 	return(NULL);
7813     }
7814     cur = *ptr;
7815     if (cur != ';') {
7816 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7817 	xmlFree(name);
7818 	*str = ptr;
7819 	return(NULL);
7820     }
7821     ptr++;
7822 
7823     /*
7824      * Increate the number of entity references parsed
7825      */
7826     ctxt->nbentities++;
7827 
7828     /*
7829      * Request the entity from SAX
7830      */
7831     if ((ctxt->sax != NULL) &&
7832 	(ctxt->sax->getParameterEntity != NULL))
7833 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7834     if (ctxt->instate == XML_PARSER_EOF) {
7835 	xmlFree(name);
7836 	return(NULL);
7837     }
7838     if (entity == NULL) {
7839 	/*
7840 	 * [ WFC: Entity Declared ]
7841 	 * In a document without any DTD, a document with only an
7842 	 * internal DTD subset which contains no parameter entity
7843 	 * references, or a document with "standalone='yes'", ...
7844 	 * ... The declaration of a parameter entity must precede
7845 	 * any reference to it...
7846 	 */
7847 	if ((ctxt->standalone == 1) ||
7848 	    ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7849 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7850 		 "PEReference: %%%s; not found\n", name);
7851 	} else {
7852 	    /*
7853 	     * [ VC: Entity Declared ]
7854 	     * In a document with an external subset or external
7855 	     * parameter entities with "standalone='no'", ...
7856 	     * ... The declaration of a parameter entity must
7857 	     * precede any reference to it...
7858 	     */
7859 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7860 			  "PEReference: %%%s; not found\n",
7861 			  name, NULL);
7862 	    ctxt->valid = 0;
7863 	}
7864     } else {
7865 	/*
7866 	 * Internal checking in case the entity quest barfed
7867 	 */
7868 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7869 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7870 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7871 			  "%%%s; is not a parameter entity\n",
7872 			  name, NULL);
7873 	}
7874     }
7875     ctxt->hasPErefs = 1;
7876     xmlFree(name);
7877     *str = ptr;
7878     return(entity);
7879 }
7880 
7881 /**
7882  * xmlParseDocTypeDecl:
7883  * @ctxt:  an XML parser context
7884  *
7885  * parse a DOCTYPE declaration
7886  *
7887  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7888  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7889  *
7890  * [ VC: Root Element Type ]
7891  * The Name in the document type declaration must match the element
7892  * type of the root element.
7893  */
7894 
7895 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)7896 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7897     const xmlChar *name = NULL;
7898     xmlChar *ExternalID = NULL;
7899     xmlChar *URI = NULL;
7900 
7901     /*
7902      * We know that '<!DOCTYPE' has been detected.
7903      */
7904     SKIP(9);
7905 
7906     SKIP_BLANKS;
7907 
7908     /*
7909      * Parse the DOCTYPE name.
7910      */
7911     name = xmlParseName(ctxt);
7912     if (name == NULL) {
7913 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7914 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7915     }
7916     ctxt->intSubName = name;
7917 
7918     SKIP_BLANKS;
7919 
7920     /*
7921      * Check for SystemID and ExternalID
7922      */
7923     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7924 
7925     if ((URI != NULL) || (ExternalID != NULL)) {
7926         ctxt->hasExternalSubset = 1;
7927     }
7928     ctxt->extSubURI = URI;
7929     ctxt->extSubSystem = ExternalID;
7930 
7931     SKIP_BLANKS;
7932 
7933     /*
7934      * Create and update the internal subset.
7935      */
7936     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7937 	(!ctxt->disableSAX))
7938 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7939     if (ctxt->instate == XML_PARSER_EOF)
7940 	return;
7941 
7942     /*
7943      * Is there any internal subset declarations ?
7944      * they are handled separately in xmlParseInternalSubset()
7945      */
7946     if (RAW == '[')
7947 	return;
7948 
7949     /*
7950      * We should be at the end of the DOCTYPE declaration.
7951      */
7952     if (RAW != '>') {
7953 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7954     }
7955     NEXT;
7956 }
7957 
7958 /**
7959  * xmlParseInternalSubset:
7960  * @ctxt:  an XML parser context
7961  *
7962  * parse the internal subset declaration
7963  *
7964  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7965  */
7966 
7967 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)7968 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7969     /*
7970      * Is there any DTD definition ?
7971      */
7972     if (RAW == '[') {
7973         ctxt->instate = XML_PARSER_DTD;
7974         NEXT;
7975 	/*
7976 	 * Parse the succession of Markup declarations and
7977 	 * PEReferences.
7978 	 * Subsequence (markupdecl | PEReference | S)*
7979 	 */
7980 	while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
7981 	    const xmlChar *check = CUR_PTR;
7982 	    unsigned int cons = ctxt->input->consumed;
7983 
7984 	    SKIP_BLANKS;
7985 	    xmlParseMarkupDecl(ctxt);
7986 	    xmlParsePEReference(ctxt);
7987 
7988 	    /*
7989 	     * Pop-up of finished entities.
7990 	     */
7991 	    while ((RAW == 0) && (ctxt->inputNr > 1))
7992 		xmlPopInput(ctxt);
7993 
7994 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7995 		xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7996 	     "xmlParseInternalSubset: error detected in Markup declaration\n");
7997 		break;
7998 	    }
7999 	}
8000 	if (RAW == ']') {
8001 	    NEXT;
8002 	    SKIP_BLANKS;
8003 	}
8004     }
8005 
8006     /*
8007      * We should be at the end of the DOCTYPE declaration.
8008      */
8009     if (RAW != '>') {
8010 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8011     }
8012     NEXT;
8013 }
8014 
8015 #ifdef LIBXML_SAX1_ENABLED
8016 /**
8017  * xmlParseAttribute:
8018  * @ctxt:  an XML parser context
8019  * @value:  a xmlChar ** used to store the value of the attribute
8020  *
8021  * parse an attribute
8022  *
8023  * [41] Attribute ::= Name Eq AttValue
8024  *
8025  * [ WFC: No External Entity References ]
8026  * Attribute values cannot contain direct or indirect entity references
8027  * to external entities.
8028  *
8029  * [ WFC: No < in Attribute Values ]
8030  * The replacement text of any entity referred to directly or indirectly in
8031  * an attribute value (other than "&lt;") must not contain a <.
8032  *
8033  * [ VC: Attribute Value Type ]
8034  * The attribute must have been declared; the value must be of the type
8035  * declared for it.
8036  *
8037  * [25] Eq ::= S? '=' S?
8038  *
8039  * With namespace:
8040  *
8041  * [NS 11] Attribute ::= QName Eq AttValue
8042  *
8043  * Also the case QName == xmlns:??? is handled independently as a namespace
8044  * definition.
8045  *
8046  * Returns the attribute name, and the value in *value.
8047  */
8048 
8049 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8050 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8051     const xmlChar *name;
8052     xmlChar *val;
8053 
8054     *value = NULL;
8055     GROW;
8056     name = xmlParseName(ctxt);
8057     if (name == NULL) {
8058 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8059 	               "error parsing attribute name\n");
8060         return(NULL);
8061     }
8062 
8063     /*
8064      * read the value
8065      */
8066     SKIP_BLANKS;
8067     if (RAW == '=') {
8068         NEXT;
8069 	SKIP_BLANKS;
8070 	val = xmlParseAttValue(ctxt);
8071 	ctxt->instate = XML_PARSER_CONTENT;
8072     } else {
8073 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8074 	       "Specification mandate value for attribute %s\n", name);
8075 	return(NULL);
8076     }
8077 
8078     /*
8079      * Check that xml:lang conforms to the specification
8080      * No more registered as an error, just generate a warning now
8081      * since this was deprecated in XML second edition
8082      */
8083     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8084 	if (!xmlCheckLanguageID(val)) {
8085 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8086 		          "Malformed value for xml:lang : %s\n",
8087 			  val, NULL);
8088 	}
8089     }
8090 
8091     /*
8092      * Check that xml:space conforms to the specification
8093      */
8094     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8095 	if (xmlStrEqual(val, BAD_CAST "default"))
8096 	    *(ctxt->space) = 0;
8097 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8098 	    *(ctxt->space) = 1;
8099 	else {
8100 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8101 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8102                                  val, NULL);
8103 	}
8104     }
8105 
8106     *value = val;
8107     return(name);
8108 }
8109 
8110 /**
8111  * xmlParseStartTag:
8112  * @ctxt:  an XML parser context
8113  *
8114  * parse a start of tag either for rule element or
8115  * EmptyElement. In both case we don't parse the tag closing chars.
8116  *
8117  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8118  *
8119  * [ WFC: Unique Att Spec ]
8120  * No attribute name may appear more than once in the same start-tag or
8121  * empty-element tag.
8122  *
8123  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8124  *
8125  * [ WFC: Unique Att Spec ]
8126  * No attribute name may appear more than once in the same start-tag or
8127  * empty-element tag.
8128  *
8129  * With namespace:
8130  *
8131  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8132  *
8133  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8134  *
8135  * Returns the element name parsed
8136  */
8137 
8138 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8139 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8140     const xmlChar *name;
8141     const xmlChar *attname;
8142     xmlChar *attvalue;
8143     const xmlChar **atts = ctxt->atts;
8144     int nbatts = 0;
8145     int maxatts = ctxt->maxatts;
8146     int i;
8147 
8148     if (RAW != '<') return(NULL);
8149     NEXT1;
8150 
8151     name = xmlParseName(ctxt);
8152     if (name == NULL) {
8153 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8154 	     "xmlParseStartTag: invalid element name\n");
8155         return(NULL);
8156     }
8157 
8158     /*
8159      * Now parse the attributes, it ends up with the ending
8160      *
8161      * (S Attribute)* S?
8162      */
8163     SKIP_BLANKS;
8164     GROW;
8165 
8166     while (((RAW != '>') &&
8167 	   ((RAW != '/') || (NXT(1) != '>')) &&
8168 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8169 	const xmlChar *q = CUR_PTR;
8170 	unsigned int cons = ctxt->input->consumed;
8171 
8172 	attname = xmlParseAttribute(ctxt, &attvalue);
8173         if ((attname != NULL) && (attvalue != NULL)) {
8174 	    /*
8175 	     * [ WFC: Unique Att Spec ]
8176 	     * No attribute name may appear more than once in the same
8177 	     * start-tag or empty-element tag.
8178 	     */
8179 	    for (i = 0; i < nbatts;i += 2) {
8180 	        if (xmlStrEqual(atts[i], attname)) {
8181 		    xmlErrAttributeDup(ctxt, NULL, attname);
8182 		    xmlFree(attvalue);
8183 		    goto failed;
8184 		}
8185 	    }
8186 	    /*
8187 	     * Add the pair to atts
8188 	     */
8189 	    if (atts == NULL) {
8190 	        maxatts = 22; /* allow for 10 attrs by default */
8191 	        atts = (const xmlChar **)
8192 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8193 		if (atts == NULL) {
8194 		    xmlErrMemory(ctxt, NULL);
8195 		    if (attvalue != NULL)
8196 			xmlFree(attvalue);
8197 		    goto failed;
8198 		}
8199 		ctxt->atts = atts;
8200 		ctxt->maxatts = maxatts;
8201 	    } else if (nbatts + 4 > maxatts) {
8202 	        const xmlChar **n;
8203 
8204 	        maxatts *= 2;
8205 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8206 					     maxatts * sizeof(const xmlChar *));
8207 		if (n == NULL) {
8208 		    xmlErrMemory(ctxt, NULL);
8209 		    if (attvalue != NULL)
8210 			xmlFree(attvalue);
8211 		    goto failed;
8212 		}
8213 		atts = n;
8214 		ctxt->atts = atts;
8215 		ctxt->maxatts = maxatts;
8216 	    }
8217 	    atts[nbatts++] = attname;
8218 	    atts[nbatts++] = attvalue;
8219 	    atts[nbatts] = NULL;
8220 	    atts[nbatts + 1] = NULL;
8221 	} else {
8222 	    if (attvalue != NULL)
8223 		xmlFree(attvalue);
8224 	}
8225 
8226 failed:
8227 
8228 	GROW
8229 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8230 	    break;
8231 	if (!IS_BLANK_CH(RAW)) {
8232 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8233 			   "attributes construct error\n");
8234 	}
8235 	SKIP_BLANKS;
8236         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8237             (attname == NULL) && (attvalue == NULL)) {
8238 	    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8239 			   "xmlParseStartTag: problem parsing attributes\n");
8240 	    break;
8241 	}
8242 	SHRINK;
8243         GROW;
8244     }
8245 
8246     /*
8247      * SAX: Start of Element !
8248      */
8249     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8250 	(!ctxt->disableSAX)) {
8251 	if (nbatts > 0)
8252 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8253 	else
8254 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8255     }
8256 
8257     if (atts != NULL) {
8258         /* Free only the content strings */
8259         for (i = 1;i < nbatts;i+=2)
8260 	    if (atts[i] != NULL)
8261 	       xmlFree((xmlChar *) atts[i]);
8262     }
8263     return(name);
8264 }
8265 
8266 /**
8267  * xmlParseEndTag1:
8268  * @ctxt:  an XML parser context
8269  * @line:  line of the start tag
8270  * @nsNr:  number of namespaces on the start tag
8271  *
8272  * parse an end of tag
8273  *
8274  * [42] ETag ::= '</' Name S? '>'
8275  *
8276  * With namespace
8277  *
8278  * [NS 9] ETag ::= '</' QName S? '>'
8279  */
8280 
8281 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8282 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8283     const xmlChar *name;
8284 
8285     GROW;
8286     if ((RAW != '<') || (NXT(1) != '/')) {
8287 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8288 		       "xmlParseEndTag: '</' not found\n");
8289 	return;
8290     }
8291     SKIP(2);
8292 
8293     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8294 
8295     /*
8296      * We should definitely be at the ending "S? '>'" part
8297      */
8298     GROW;
8299     SKIP_BLANKS;
8300     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8301 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8302     } else
8303 	NEXT1;
8304 
8305     /*
8306      * [ WFC: Element Type Match ]
8307      * The Name in an element's end-tag must match the element type in the
8308      * start-tag.
8309      *
8310      */
8311     if (name != (xmlChar*)1) {
8312         if (name == NULL) name = BAD_CAST "unparseable";
8313         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8314 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8315 		                ctxt->name, line, name);
8316     }
8317 
8318     /*
8319      * SAX: End of Tag
8320      */
8321     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8322 	(!ctxt->disableSAX))
8323         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8324 
8325     namePop(ctxt);
8326     spacePop(ctxt);
8327     return;
8328 }
8329 
8330 /**
8331  * xmlParseEndTag:
8332  * @ctxt:  an XML parser context
8333  *
8334  * parse an end of tag
8335  *
8336  * [42] ETag ::= '</' Name S? '>'
8337  *
8338  * With namespace
8339  *
8340  * [NS 9] ETag ::= '</' QName S? '>'
8341  */
8342 
8343 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8344 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8345     xmlParseEndTag1(ctxt, 0);
8346 }
8347 #endif /* LIBXML_SAX1_ENABLED */
8348 
8349 /************************************************************************
8350  *									*
8351  *		      SAX 2 specific operations				*
8352  *									*
8353  ************************************************************************/
8354 
8355 /*
8356  * xmlGetNamespace:
8357  * @ctxt:  an XML parser context
8358  * @prefix:  the prefix to lookup
8359  *
8360  * Lookup the namespace name for the @prefix (which ca be NULL)
8361  * The prefix must come from the @ctxt->dict dictionnary
8362  *
8363  * Returns the namespace name or NULL if not bound
8364  */
8365 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8366 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8367     int i;
8368 
8369     if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8370     for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8371         if (ctxt->nsTab[i] == prefix) {
8372 	    if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8373 	        return(NULL);
8374 	    return(ctxt->nsTab[i + 1]);
8375 	}
8376     return(NULL);
8377 }
8378 
8379 /**
8380  * xmlParseQName:
8381  * @ctxt:  an XML parser context
8382  * @prefix:  pointer to store the prefix part
8383  *
8384  * parse an XML Namespace QName
8385  *
8386  * [6]  QName  ::= (Prefix ':')? LocalPart
8387  * [7]  Prefix  ::= NCName
8388  * [8]  LocalPart  ::= NCName
8389  *
8390  * Returns the Name parsed or NULL
8391  */
8392 
8393 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8394 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8395     const xmlChar *l, *p;
8396 
8397     GROW;
8398 
8399     l = xmlParseNCName(ctxt);
8400     if (l == NULL) {
8401         if (CUR == ':') {
8402 	    l = xmlParseName(ctxt);
8403 	    if (l != NULL) {
8404 	        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8405 		         "Failed to parse QName '%s'\n", l, NULL, NULL);
8406 		*prefix = NULL;
8407 		return(l);
8408 	    }
8409 	}
8410         return(NULL);
8411     }
8412     if (CUR == ':') {
8413         NEXT;
8414 	p = l;
8415 	l = xmlParseNCName(ctxt);
8416 	if (l == NULL) {
8417 	    xmlChar *tmp;
8418 
8419             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8420 	             "Failed to parse QName '%s:'\n", p, NULL, NULL);
8421 	    l = xmlParseNmtoken(ctxt);
8422 	    if (l == NULL)
8423 		tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8424 	    else {
8425 		tmp = xmlBuildQName(l, p, NULL, 0);
8426 		xmlFree((char *)l);
8427 	    }
8428 	    p = xmlDictLookup(ctxt->dict, tmp, -1);
8429 	    if (tmp != NULL) xmlFree(tmp);
8430 	    *prefix = NULL;
8431 	    return(p);
8432 	}
8433 	if (CUR == ':') {
8434 	    xmlChar *tmp;
8435 
8436             xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8437 	             "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8438 	    NEXT;
8439 	    tmp = (xmlChar *) xmlParseName(ctxt);
8440 	    if (tmp != NULL) {
8441 	        tmp = xmlBuildQName(tmp, l, NULL, 0);
8442 		l = xmlDictLookup(ctxt->dict, tmp, -1);
8443 		if (tmp != NULL) xmlFree(tmp);
8444 		*prefix = p;
8445 		return(l);
8446 	    }
8447 	    tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8448 	    l = xmlDictLookup(ctxt->dict, tmp, -1);
8449 	    if (tmp != NULL) xmlFree(tmp);
8450 	    *prefix = p;
8451 	    return(l);
8452 	}
8453 	*prefix = p;
8454     } else
8455         *prefix = NULL;
8456     return(l);
8457 }
8458 
8459 /**
8460  * xmlParseQNameAndCompare:
8461  * @ctxt:  an XML parser context
8462  * @name:  the localname
8463  * @prefix:  the prefix, if any.
8464  *
8465  * parse an XML name and compares for match
8466  * (specialized for endtag parsing)
8467  *
8468  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8469  * and the name for mismatch
8470  */
8471 
8472 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8473 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8474                         xmlChar const *prefix) {
8475     const xmlChar *cmp;
8476     const xmlChar *in;
8477     const xmlChar *ret;
8478     const xmlChar *prefix2;
8479 
8480     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8481 
8482     GROW;
8483     in = ctxt->input->cur;
8484 
8485     cmp = prefix;
8486     while (*in != 0 && *in == *cmp) {
8487     	++in;
8488 	++cmp;
8489     }
8490     if ((*cmp == 0) && (*in == ':')) {
8491         in++;
8492 	cmp = name;
8493 	while (*in != 0 && *in == *cmp) {
8494 	    ++in;
8495 	    ++cmp;
8496 	}
8497 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8498 	    /* success */
8499 	    ctxt->input->cur = in;
8500 	    return((const xmlChar*) 1);
8501 	}
8502     }
8503     /*
8504      * all strings coms from the dictionary, equality can be done directly
8505      */
8506     ret = xmlParseQName (ctxt, &prefix2);
8507     if ((ret == name) && (prefix == prefix2))
8508 	return((const xmlChar*) 1);
8509     return ret;
8510 }
8511 
8512 /**
8513  * xmlParseAttValueInternal:
8514  * @ctxt:  an XML parser context
8515  * @len:  attribute len result
8516  * @alloc:  whether the attribute was reallocated as a new string
8517  * @normalize:  if 1 then further non-CDATA normalization must be done
8518  *
8519  * parse a value for an attribute.
8520  * NOTE: if no normalization is needed, the routine will return pointers
8521  *       directly from the data buffer.
8522  *
8523  * 3.3.3 Attribute-Value Normalization:
8524  * Before the value of an attribute is passed to the application or
8525  * checked for validity, the XML processor must normalize it as follows:
8526  * - a character reference is processed by appending the referenced
8527  *   character to the attribute value
8528  * - an entity reference is processed by recursively processing the
8529  *   replacement text of the entity
8530  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8531  *   appending #x20 to the normalized value, except that only a single
8532  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8533  *   parsed entity or the literal entity value of an internal parsed entity
8534  * - other characters are processed by appending them to the normalized value
8535  * If the declared value is not CDATA, then the XML processor must further
8536  * process the normalized attribute value by discarding any leading and
8537  * trailing space (#x20) characters, and by replacing sequences of space
8538  * (#x20) characters by a single space (#x20) character.
8539  * All attributes for which no declaration has been read should be treated
8540  * by a non-validating parser as if declared CDATA.
8541  *
8542  * Returns the AttValue parsed or NULL. The value has to be freed by the
8543  *     caller if it was copied, this can be detected by val[*len] == 0.
8544  */
8545 
8546 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8547 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8548                          int normalize)
8549 {
8550     xmlChar limit = 0;
8551     const xmlChar *in = NULL, *start, *end, *last;
8552     xmlChar *ret = NULL;
8553 
8554     GROW;
8555     in = (xmlChar *) CUR_PTR;
8556     if (*in != '"' && *in != '\'') {
8557         xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8558         return (NULL);
8559     }
8560     ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8561 
8562     /*
8563      * try to handle in this routine the most common case where no
8564      * allocation of a new string is required and where content is
8565      * pure ASCII.
8566      */
8567     limit = *in++;
8568     end = ctxt->input->end;
8569     start = in;
8570     if (in >= end) {
8571         const xmlChar *oldbase = ctxt->input->base;
8572 	GROW;
8573 	if (oldbase != ctxt->input->base) {
8574 	    long delta = ctxt->input->base - oldbase;
8575 	    start = start + delta;
8576 	    in = in + delta;
8577 	}
8578 	end = ctxt->input->end;
8579     }
8580     if (normalize) {
8581         /*
8582 	 * Skip any leading spaces
8583 	 */
8584 	while ((in < end) && (*in != limit) &&
8585 	       ((*in == 0x20) || (*in == 0x9) ||
8586 	        (*in == 0xA) || (*in == 0xD))) {
8587 	    in++;
8588 	    start = in;
8589 	    if (in >= end) {
8590 		const xmlChar *oldbase = ctxt->input->base;
8591 		GROW;
8592                 if (ctxt->instate == XML_PARSER_EOF)
8593                     return(NULL);
8594 		if (oldbase != ctxt->input->base) {
8595 		    long delta = ctxt->input->base - oldbase;
8596 		    start = start + delta;
8597 		    in = in + delta;
8598 		}
8599 		end = ctxt->input->end;
8600 	    }
8601 	}
8602 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8603 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8604 	    if ((*in++ == 0x20) && (*in == 0x20)) break;
8605 	    if (in >= end) {
8606 		const xmlChar *oldbase = ctxt->input->base;
8607 		GROW;
8608                 if (ctxt->instate == XML_PARSER_EOF)
8609                     return(NULL);
8610 		if (oldbase != ctxt->input->base) {
8611 		    long delta = ctxt->input->base - oldbase;
8612 		    start = start + delta;
8613 		    in = in + delta;
8614 		}
8615 		end = ctxt->input->end;
8616 	    }
8617 	}
8618 	last = in;
8619 	/*
8620 	 * skip the trailing blanks
8621 	 */
8622 	while ((last[-1] == 0x20) && (last > start)) last--;
8623 	while ((in < end) && (*in != limit) &&
8624 	       ((*in == 0x20) || (*in == 0x9) ||
8625 	        (*in == 0xA) || (*in == 0xD))) {
8626 	    in++;
8627 	    if (in >= end) {
8628 		const xmlChar *oldbase = ctxt->input->base;
8629 		GROW;
8630                 if (ctxt->instate == XML_PARSER_EOF)
8631                     return(NULL);
8632 		if (oldbase != ctxt->input->base) {
8633 		    long delta = ctxt->input->base - oldbase;
8634 		    start = start + delta;
8635 		    in = in + delta;
8636 		    last = last + delta;
8637 		}
8638 		end = ctxt->input->end;
8639 	    }
8640 	}
8641 	if (*in != limit) goto need_complex;
8642     } else {
8643 	while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8644 	       (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8645 	    in++;
8646 	    if (in >= end) {
8647 		const xmlChar *oldbase = ctxt->input->base;
8648 		GROW;
8649                 if (ctxt->instate == XML_PARSER_EOF)
8650                     return(NULL);
8651 		if (oldbase != ctxt->input->base) {
8652 		    long delta = ctxt->input->base - oldbase;
8653 		    start = start + delta;
8654 		    in = in + delta;
8655 		}
8656 		end = ctxt->input->end;
8657 	    }
8658 	}
8659 	last = in;
8660 	if (*in != limit) goto need_complex;
8661     }
8662     in++;
8663     if (len != NULL) {
8664         *len = last - start;
8665         ret = (xmlChar *) start;
8666     } else {
8667         if (alloc) *alloc = 1;
8668         ret = xmlStrndup(start, last - start);
8669     }
8670     CUR_PTR = in;
8671     if (alloc) *alloc = 0;
8672     return ret;
8673 need_complex:
8674     if (alloc) *alloc = 1;
8675     return xmlParseAttValueComplex(ctxt, len, normalize);
8676 }
8677 
8678 /**
8679  * xmlParseAttribute2:
8680  * @ctxt:  an XML parser context
8681  * @pref:  the element prefix
8682  * @elem:  the element name
8683  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8684  * @value:  a xmlChar ** used to store the value of the attribute
8685  * @len:  an int * to save the length of the attribute
8686  * @alloc:  an int * to indicate if the attribute was allocated
8687  *
8688  * parse an attribute in the new SAX2 framework.
8689  *
8690  * Returns the attribute name, and the value in *value, .
8691  */
8692 
8693 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)8694 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8695                    const xmlChar * pref, const xmlChar * elem,
8696                    const xmlChar ** prefix, xmlChar ** value,
8697                    int *len, int *alloc)
8698 {
8699     const xmlChar *name;
8700     xmlChar *val, *internal_val = NULL;
8701     int normalize = 0;
8702 
8703     *value = NULL;
8704     GROW;
8705     name = xmlParseQName(ctxt, prefix);
8706     if (name == NULL) {
8707         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8708                        "error parsing attribute name\n");
8709         return (NULL);
8710     }
8711 
8712     /*
8713      * get the type if needed
8714      */
8715     if (ctxt->attsSpecial != NULL) {
8716         int type;
8717 
8718         type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8719                                             pref, elem, *prefix, name);
8720         if (type != 0)
8721             normalize = 1;
8722     }
8723 
8724     /*
8725      * read the value
8726      */
8727     SKIP_BLANKS;
8728     if (RAW == '=') {
8729         NEXT;
8730         SKIP_BLANKS;
8731         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8732 	if (normalize) {
8733 	    /*
8734 	     * Sometimes a second normalisation pass for spaces is needed
8735 	     * but that only happens if charrefs or entities refernces
8736 	     * have been used in the attribute value, i.e. the attribute
8737 	     * value have been extracted in an allocated string already.
8738 	     */
8739 	    if (*alloc) {
8740 	        const xmlChar *val2;
8741 
8742 	        val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8743 		if ((val2 != NULL) && (val2 != val)) {
8744 		    xmlFree(val);
8745 		    val = (xmlChar *) val2;
8746 		}
8747 	    }
8748 	}
8749         ctxt->instate = XML_PARSER_CONTENT;
8750     } else {
8751         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8752                           "Specification mandate value for attribute %s\n",
8753                           name);
8754         return (NULL);
8755     }
8756 
8757     if (*prefix == ctxt->str_xml) {
8758         /*
8759          * Check that xml:lang conforms to the specification
8760          * No more registered as an error, just generate a warning now
8761          * since this was deprecated in XML second edition
8762          */
8763         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8764             internal_val = xmlStrndup(val, *len);
8765             if (!xmlCheckLanguageID(internal_val)) {
8766                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8767                               "Malformed value for xml:lang : %s\n",
8768                               internal_val, NULL);
8769             }
8770         }
8771 
8772         /*
8773          * Check that xml:space conforms to the specification
8774          */
8775         if (xmlStrEqual(name, BAD_CAST "space")) {
8776             internal_val = xmlStrndup(val, *len);
8777             if (xmlStrEqual(internal_val, BAD_CAST "default"))
8778                 *(ctxt->space) = 0;
8779             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8780                 *(ctxt->space) = 1;
8781             else {
8782                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8783                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8784                               internal_val, NULL);
8785             }
8786         }
8787         if (internal_val) {
8788             xmlFree(internal_val);
8789         }
8790     }
8791 
8792     *value = val;
8793     return (name);
8794 }
8795 /**
8796  * xmlParseStartTag2:
8797  * @ctxt:  an XML parser context
8798  *
8799  * parse a start of tag either for rule element or
8800  * EmptyElement. In both case we don't parse the tag closing chars.
8801  * This routine is called when running SAX2 parsing
8802  *
8803  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8804  *
8805  * [ WFC: Unique Att Spec ]
8806  * No attribute name may appear more than once in the same start-tag or
8807  * empty-element tag.
8808  *
8809  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8810  *
8811  * [ WFC: Unique Att Spec ]
8812  * No attribute name may appear more than once in the same start-tag or
8813  * empty-element tag.
8814  *
8815  * With namespace:
8816  *
8817  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8818  *
8819  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8820  *
8821  * Returns the element name parsed
8822  */
8823 
8824 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)8825 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8826                   const xmlChar **URI, int *tlen) {
8827     const xmlChar *localname;
8828     const xmlChar *prefix;
8829     const xmlChar *attname;
8830     const xmlChar *aprefix;
8831     const xmlChar *nsname;
8832     xmlChar *attvalue;
8833     const xmlChar **atts = ctxt->atts;
8834     int maxatts = ctxt->maxatts;
8835     int nratts, nbatts, nbdef;
8836     int i, j, nbNs, attval, oldline, oldcol;
8837     const xmlChar *base;
8838     unsigned long cur;
8839     int nsNr = ctxt->nsNr;
8840 
8841     if (RAW != '<') return(NULL);
8842     NEXT1;
8843 
8844     /*
8845      * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8846      *       point since the attribute values may be stored as pointers to
8847      *       the buffer and calling SHRINK would destroy them !
8848      *       The Shrinking is only possible once the full set of attribute
8849      *       callbacks have been done.
8850      */
8851 reparse:
8852     SHRINK;
8853     base = ctxt->input->base;
8854     cur = ctxt->input->cur - ctxt->input->base;
8855     oldline = ctxt->input->line;
8856     oldcol = ctxt->input->col;
8857     nbatts = 0;
8858     nratts = 0;
8859     nbdef = 0;
8860     nbNs = 0;
8861     attval = 0;
8862     /* Forget any namespaces added during an earlier parse of this element. */
8863     ctxt->nsNr = nsNr;
8864 
8865     localname = xmlParseQName(ctxt, &prefix);
8866     if (localname == NULL) {
8867 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8868 		       "StartTag: invalid element name\n");
8869         return(NULL);
8870     }
8871     *tlen = ctxt->input->cur - ctxt->input->base - cur;
8872 
8873     /*
8874      * Now parse the attributes, it ends up with the ending
8875      *
8876      * (S Attribute)* S?
8877      */
8878     SKIP_BLANKS;
8879     GROW;
8880     if (ctxt->input->base != base) goto base_changed;
8881 
8882     while (((RAW != '>') &&
8883 	   ((RAW != '/') || (NXT(1) != '>')) &&
8884 	   (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8885 	const xmlChar *q = CUR_PTR;
8886 	unsigned int cons = ctxt->input->consumed;
8887 	int len = -1, alloc = 0;
8888 
8889 	attname = xmlParseAttribute2(ctxt, prefix, localname,
8890 	                             &aprefix, &attvalue, &len, &alloc);
8891 	if (ctxt->input->base != base) {
8892 	    if ((attvalue != NULL) && (alloc != 0))
8893 	        xmlFree(attvalue);
8894 	    attvalue = NULL;
8895 	    goto base_changed;
8896 	}
8897         if ((attname != NULL) && (attvalue != NULL)) {
8898 	    if (len < 0) len = xmlStrlen(attvalue);
8899             if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8900 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8901 		xmlURIPtr uri;
8902 
8903                 if (*URL != 0) {
8904 		    uri = xmlParseURI((const char *) URL);
8905 		    if (uri == NULL) {
8906 			xmlNsErr(ctxt, XML_WAR_NS_URI,
8907 			         "xmlns: '%s' is not a valid URI\n",
8908 					   URL, NULL, NULL);
8909 		    } else {
8910 			if (uri->scheme == NULL) {
8911 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8912 				      "xmlns: URI %s is not absolute\n",
8913 				      URL, NULL, NULL);
8914 			}
8915 			xmlFreeURI(uri);
8916 		    }
8917 		    if (URL == ctxt->str_xml_ns) {
8918 			if (attname != ctxt->str_xml) {
8919 			    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8920 			 "xml namespace URI cannot be the default namespace\n",
8921 				     NULL, NULL, NULL);
8922 			}
8923 			goto skip_default_ns;
8924 		    }
8925 		    if ((len == 29) &&
8926 			(xmlStrEqual(URL,
8927 				 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8928 			xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8929 			     "reuse of the xmlns namespace name is forbidden\n",
8930 				 NULL, NULL, NULL);
8931 			goto skip_default_ns;
8932 		    }
8933 		}
8934 		/*
8935 		 * check that it's not a defined namespace
8936 		 */
8937 		for (j = 1;j <= nbNs;j++)
8938 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8939 			break;
8940 		if (j <= nbNs)
8941 		    xmlErrAttributeDup(ctxt, NULL, attname);
8942 		else
8943 		    if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8944 skip_default_ns:
8945 		if (alloc != 0) xmlFree(attvalue);
8946 		SKIP_BLANKS;
8947 		continue;
8948 	    }
8949             if (aprefix == ctxt->str_xmlns) {
8950 	        const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8951 		xmlURIPtr uri;
8952 
8953                 if (attname == ctxt->str_xml) {
8954 		    if (URL != ctxt->str_xml_ns) {
8955 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8956 			         "xml namespace prefix mapped to wrong URI\n",
8957 			         NULL, NULL, NULL);
8958 		    }
8959 		    /*
8960 		     * Do not keep a namespace definition node
8961 		     */
8962 		    goto skip_ns;
8963 		}
8964                 if (URL == ctxt->str_xml_ns) {
8965 		    if (attname != ctxt->str_xml) {
8966 		        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8967 			         "xml namespace URI mapped to wrong prefix\n",
8968 			         NULL, NULL, NULL);
8969 		    }
8970 		    goto skip_ns;
8971 		}
8972                 if (attname == ctxt->str_xmlns) {
8973 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8974 			     "redefinition of the xmlns prefix is forbidden\n",
8975 			     NULL, NULL, NULL);
8976 		    goto skip_ns;
8977 		}
8978 		if ((len == 29) &&
8979 		    (xmlStrEqual(URL,
8980 		                 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8981 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8982 			     "reuse of the xmlns namespace name is forbidden\n",
8983 			     NULL, NULL, NULL);
8984 		    goto skip_ns;
8985 		}
8986 		if ((URL == NULL) || (URL[0] == 0)) {
8987 		    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8988 		             "xmlns:%s: Empty XML namespace is not allowed\n",
8989 			          attname, NULL, NULL);
8990 		    goto skip_ns;
8991 		} else {
8992 		    uri = xmlParseURI((const char *) URL);
8993 		    if (uri == NULL) {
8994 			xmlNsErr(ctxt, XML_WAR_NS_URI,
8995 			     "xmlns:%s: '%s' is not a valid URI\n",
8996 					   attname, URL, NULL);
8997 		    } else {
8998 			if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8999 			    xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9000 				      "xmlns:%s: URI %s is not absolute\n",
9001 				      attname, URL, NULL);
9002 			}
9003 			xmlFreeURI(uri);
9004 		    }
9005 		}
9006 
9007 		/*
9008 		 * check that it's not a defined namespace
9009 		 */
9010 		for (j = 1;j <= nbNs;j++)
9011 		    if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9012 			break;
9013 		if (j <= nbNs)
9014 		    xmlErrAttributeDup(ctxt, aprefix, attname);
9015 		else
9016 		    if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9017 skip_ns:
9018 		if (alloc != 0) xmlFree(attvalue);
9019 		SKIP_BLANKS;
9020 		if (ctxt->input->base != base) goto base_changed;
9021 		continue;
9022 	    }
9023 
9024 	    /*
9025 	     * Add the pair to atts
9026 	     */
9027 	    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9028 	        if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9029 		    if (attvalue[len] == 0)
9030 			xmlFree(attvalue);
9031 		    goto failed;
9032 		}
9033 	        maxatts = ctxt->maxatts;
9034 		atts = ctxt->atts;
9035 	    }
9036 	    ctxt->attallocs[nratts++] = alloc;
9037 	    atts[nbatts++] = attname;
9038 	    atts[nbatts++] = aprefix;
9039 	    atts[nbatts++] = NULL; /* the URI will be fetched later */
9040 	    atts[nbatts++] = attvalue;
9041 	    attvalue += len;
9042 	    atts[nbatts++] = attvalue;
9043 	    /*
9044 	     * tag if some deallocation is needed
9045 	     */
9046 	    if (alloc != 0) attval = 1;
9047 	} else {
9048 	    if ((attvalue != NULL) && (attvalue[len] == 0))
9049 		xmlFree(attvalue);
9050 	}
9051 
9052 failed:
9053 
9054 	GROW
9055         if (ctxt->instate == XML_PARSER_EOF)
9056             break;
9057 	if (ctxt->input->base != base) goto base_changed;
9058 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9059 	    break;
9060 	if (!IS_BLANK_CH(RAW)) {
9061 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9062 			   "attributes construct error\n");
9063 	    break;
9064 	}
9065 	SKIP_BLANKS;
9066         if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9067             (attname == NULL) && (attvalue == NULL)) {
9068 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9069 	         "xmlParseStartTag: problem parsing attributes\n");
9070 	    break;
9071 	}
9072         GROW;
9073 	if (ctxt->input->base != base) goto base_changed;
9074     }
9075 
9076     /*
9077      * The attributes defaulting
9078      */
9079     if (ctxt->attsDefault != NULL) {
9080         xmlDefAttrsPtr defaults;
9081 
9082 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9083 	if (defaults != NULL) {
9084 	    for (i = 0;i < defaults->nbAttrs;i++) {
9085 	        attname = defaults->values[5 * i];
9086 		aprefix = defaults->values[5 * i + 1];
9087 
9088                 /*
9089 		 * special work for namespaces defaulted defs
9090 		 */
9091 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9092 		    /*
9093 		     * check that it's not a defined namespace
9094 		     */
9095 		    for (j = 1;j <= nbNs;j++)
9096 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9097 			    break;
9098 	            if (j <= nbNs) continue;
9099 
9100 		    nsname = xmlGetNamespace(ctxt, NULL);
9101 		    if (nsname != defaults->values[5 * i + 2]) {
9102 			if (nsPush(ctxt, NULL,
9103 			           defaults->values[5 * i + 2]) > 0)
9104 			    nbNs++;
9105 		    }
9106 		} else if (aprefix == ctxt->str_xmlns) {
9107 		    /*
9108 		     * check that it's not a defined namespace
9109 		     */
9110 		    for (j = 1;j <= nbNs;j++)
9111 		        if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9112 			    break;
9113 	            if (j <= nbNs) continue;
9114 
9115 		    nsname = xmlGetNamespace(ctxt, attname);
9116 		    if (nsname != defaults->values[2]) {
9117 			if (nsPush(ctxt, attname,
9118 			           defaults->values[5 * i + 2]) > 0)
9119 			    nbNs++;
9120 		    }
9121 		} else {
9122 		    /*
9123 		     * check that it's not a defined attribute
9124 		     */
9125 		    for (j = 0;j < nbatts;j+=5) {
9126 			if ((attname == atts[j]) && (aprefix == atts[j+1]))
9127 			    break;
9128 		    }
9129 		    if (j < nbatts) continue;
9130 
9131 		    if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9132 			if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9133 			    return(NULL);
9134 			}
9135 			maxatts = ctxt->maxatts;
9136 			atts = ctxt->atts;
9137 		    }
9138 		    atts[nbatts++] = attname;
9139 		    atts[nbatts++] = aprefix;
9140 		    if (aprefix == NULL)
9141 			atts[nbatts++] = NULL;
9142 		    else
9143 		        atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9144 		    atts[nbatts++] = defaults->values[5 * i + 2];
9145 		    atts[nbatts++] = defaults->values[5 * i + 3];
9146 		    if ((ctxt->standalone == 1) &&
9147 		        (defaults->values[5 * i + 4] != NULL)) {
9148 			xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9149 	  "standalone: attribute %s on %s defaulted from external subset\n",
9150 	                                 attname, localname);
9151 		    }
9152 		    nbdef++;
9153 		}
9154 	    }
9155 	}
9156     }
9157 
9158     /*
9159      * The attributes checkings
9160      */
9161     for (i = 0; i < nbatts;i += 5) {
9162         /*
9163 	* The default namespace does not apply to attribute names.
9164 	*/
9165 	if (atts[i + 1] != NULL) {
9166 	    nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9167 	    if (nsname == NULL) {
9168 		xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9169 		    "Namespace prefix %s for %s on %s is not defined\n",
9170 		    atts[i + 1], atts[i], localname);
9171 	    }
9172 	    atts[i + 2] = nsname;
9173 	} else
9174 	    nsname = NULL;
9175 	/*
9176 	 * [ WFC: Unique Att Spec ]
9177 	 * No attribute name may appear more than once in the same
9178 	 * start-tag or empty-element tag.
9179 	 * As extended by the Namespace in XML REC.
9180 	 */
9181         for (j = 0; j < i;j += 5) {
9182 	    if (atts[i] == atts[j]) {
9183 	        if (atts[i+1] == atts[j+1]) {
9184 		    xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9185 		    break;
9186 		}
9187 		if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9188 		    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9189 			     "Namespaced Attribute %s in '%s' redefined\n",
9190 			     atts[i], nsname, NULL);
9191 		    break;
9192 		}
9193 	    }
9194 	}
9195     }
9196 
9197     nsname = xmlGetNamespace(ctxt, prefix);
9198     if ((prefix != NULL) && (nsname == NULL)) {
9199 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9200 	         "Namespace prefix %s on %s is not defined\n",
9201 		 prefix, localname, NULL);
9202     }
9203     *pref = prefix;
9204     *URI = nsname;
9205 
9206     /*
9207      * SAX: Start of Element !
9208      */
9209     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9210 	(!ctxt->disableSAX)) {
9211 	if (nbNs > 0)
9212 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9213 			  nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9214 			  nbatts / 5, nbdef, atts);
9215 	else
9216 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9217 	                  nsname, 0, NULL, nbatts / 5, nbdef, atts);
9218     }
9219 
9220     /*
9221      * Free up attribute allocated strings if needed
9222      */
9223     if (attval != 0) {
9224 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9225 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9226 	        xmlFree((xmlChar *) atts[i]);
9227     }
9228 
9229     return(localname);
9230 
9231 base_changed:
9232     /*
9233      * the attribute strings are valid iif the base didn't changed
9234      */
9235     if (attval != 0) {
9236 	for (i = 3,j = 0; j < nratts;i += 5,j++)
9237 	    if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9238 	        xmlFree((xmlChar *) atts[i]);
9239     }
9240     ctxt->input->cur = ctxt->input->base + cur;
9241     ctxt->input->line = oldline;
9242     ctxt->input->col = oldcol;
9243     if (ctxt->wellFormed == 1) {
9244 	goto reparse;
9245     }
9246     return(NULL);
9247 }
9248 
9249 /**
9250  * xmlParseEndTag2:
9251  * @ctxt:  an XML parser context
9252  * @line:  line of the start tag
9253  * @nsNr:  number of namespaces on the start tag
9254  *
9255  * parse an end of tag
9256  *
9257  * [42] ETag ::= '</' Name S? '>'
9258  *
9259  * With namespace
9260  *
9261  * [NS 9] ETag ::= '</' QName S? '>'
9262  */
9263 
9264 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9265 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9266                 const xmlChar *URI, int line, int nsNr, int tlen) {
9267     const xmlChar *name;
9268 
9269     GROW;
9270     if ((RAW != '<') || (NXT(1) != '/')) {
9271 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9272 	return;
9273     }
9274     SKIP(2);
9275 
9276     if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9277         if (ctxt->input->cur[tlen] == '>') {
9278 	    ctxt->input->cur += tlen + 1;
9279 	    goto done;
9280 	}
9281 	ctxt->input->cur += tlen;
9282 	name = (xmlChar*)1;
9283     } else {
9284 	if (prefix == NULL)
9285 	    name = xmlParseNameAndCompare(ctxt, ctxt->name);
9286 	else
9287 	    name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9288     }
9289 
9290     /*
9291      * We should definitely be at the ending "S? '>'" part
9292      */
9293     GROW;
9294     if (ctxt->instate == XML_PARSER_EOF)
9295         return;
9296     SKIP_BLANKS;
9297     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9298 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9299     } else
9300 	NEXT1;
9301 
9302     /*
9303      * [ WFC: Element Type Match ]
9304      * The Name in an element's end-tag must match the element type in the
9305      * start-tag.
9306      *
9307      */
9308     if (name != (xmlChar*)1) {
9309         if (name == NULL) name = BAD_CAST "unparseable";
9310         if ((line == 0) && (ctxt->node != NULL))
9311             line = ctxt->node->line;
9312         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9313 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9314 		                ctxt->name, line, name);
9315     }
9316 
9317     /*
9318      * SAX: End of Tag
9319      */
9320 done:
9321     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9322 	(!ctxt->disableSAX))
9323 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9324 
9325     spacePop(ctxt);
9326     if (nsNr != 0)
9327 	nsPop(ctxt, nsNr);
9328     return;
9329 }
9330 
9331 /**
9332  * xmlParseCDSect:
9333  * @ctxt:  an XML parser context
9334  *
9335  * Parse escaped pure raw content.
9336  *
9337  * [18] CDSect ::= CDStart CData CDEnd
9338  *
9339  * [19] CDStart ::= '<![CDATA['
9340  *
9341  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9342  *
9343  * [21] CDEnd ::= ']]>'
9344  */
9345 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9346 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9347     xmlChar *buf = NULL;
9348     int len = 0;
9349     int size = XML_PARSER_BUFFER_SIZE;
9350     int r, rl;
9351     int	s, sl;
9352     int cur, l;
9353     int count = 0;
9354 
9355     /* Check 2.6.0 was NXT(0) not RAW */
9356     if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9357 	SKIP(9);
9358     } else
9359         return;
9360 
9361     ctxt->instate = XML_PARSER_CDATA_SECTION;
9362     r = CUR_CHAR(rl);
9363     if (!IS_CHAR(r)) {
9364 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9365 	ctxt->instate = XML_PARSER_CONTENT;
9366         return;
9367     }
9368     NEXTL(rl);
9369     s = CUR_CHAR(sl);
9370     if (!IS_CHAR(s)) {
9371 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9372 	ctxt->instate = XML_PARSER_CONTENT;
9373         return;
9374     }
9375     NEXTL(sl);
9376     cur = CUR_CHAR(l);
9377     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9378     if (buf == NULL) {
9379 	xmlErrMemory(ctxt, NULL);
9380 	return;
9381     }
9382     while (IS_CHAR(cur) &&
9383            ((r != ']') || (s != ']') || (cur != '>'))) {
9384 	if (len + 5 >= size) {
9385 	    xmlChar *tmp;
9386 
9387 	    size *= 2;
9388 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9389 	    if (tmp == NULL) {
9390 	        xmlFree(buf);
9391 		xmlErrMemory(ctxt, NULL);
9392 		return;
9393 	    }
9394 	    buf = tmp;
9395 	}
9396 	COPY_BUF(rl,buf,len,r);
9397 	r = s;
9398 	rl = sl;
9399 	s = cur;
9400 	sl = l;
9401 	count++;
9402 	if (count > 50) {
9403 	    GROW;
9404             if (ctxt->instate == XML_PARSER_EOF) {
9405 		xmlFree(buf);
9406 		return;
9407             }
9408 	    count = 0;
9409 	}
9410 	NEXTL(l);
9411 	cur = CUR_CHAR(l);
9412     }
9413     buf[len] = 0;
9414     ctxt->instate = XML_PARSER_CONTENT;
9415     if (cur != '>') {
9416 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9417 	                     "CData section not finished\n%.50s\n", buf);
9418 	xmlFree(buf);
9419         return;
9420     }
9421     NEXTL(l);
9422 
9423     /*
9424      * OK the buffer is to be consumed as cdata.
9425      */
9426     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9427 	if (ctxt->sax->cdataBlock != NULL)
9428 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9429 	else if (ctxt->sax->characters != NULL)
9430 	    ctxt->sax->characters(ctxt->userData, buf, len);
9431     }
9432     xmlFree(buf);
9433 }
9434 
9435 /**
9436  * xmlParseContent:
9437  * @ctxt:  an XML parser context
9438  *
9439  * Parse a content:
9440  *
9441  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9442  */
9443 
9444 void
xmlParseContent(xmlParserCtxtPtr ctxt)9445 xmlParseContent(xmlParserCtxtPtr ctxt) {
9446     GROW;
9447     while ((RAW != 0) &&
9448 	   ((RAW != '<') || (NXT(1) != '/')) &&
9449 	   (ctxt->instate != XML_PARSER_EOF)) {
9450 	const xmlChar *test = CUR_PTR;
9451 	unsigned int cons = ctxt->input->consumed;
9452 	const xmlChar *cur = ctxt->input->cur;
9453 
9454 	/*
9455 	 * First case : a Processing Instruction.
9456 	 */
9457 	if ((*cur == '<') && (cur[1] == '?')) {
9458 	    xmlParsePI(ctxt);
9459 	}
9460 
9461 	/*
9462 	 * Second case : a CDSection
9463 	 */
9464 	/* 2.6.0 test was *cur not RAW */
9465 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9466 	    xmlParseCDSect(ctxt);
9467 	}
9468 
9469 	/*
9470 	 * Third case :  a comment
9471 	 */
9472 	else if ((*cur == '<') && (NXT(1) == '!') &&
9473 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9474 	    xmlParseComment(ctxt);
9475 	    ctxt->instate = XML_PARSER_CONTENT;
9476 	}
9477 
9478 	/*
9479 	 * Fourth case :  a sub-element.
9480 	 */
9481 	else if (*cur == '<') {
9482 	    xmlParseElement(ctxt);
9483 	}
9484 
9485 	/*
9486 	 * Fifth case : a reference. If if has not been resolved,
9487 	 *    parsing returns it's Name, create the node
9488 	 */
9489 
9490 	else if (*cur == '&') {
9491 	    xmlParseReference(ctxt);
9492 	}
9493 
9494 	/*
9495 	 * Last case, text. Note that References are handled directly.
9496 	 */
9497 	else {
9498 	    xmlParseCharData(ctxt, 0);
9499 	}
9500 
9501 	GROW;
9502 	/*
9503 	 * Pop-up of finished entities.
9504 	 */
9505 	while ((RAW == 0) && (ctxt->inputNr > 1))
9506 	    xmlPopInput(ctxt);
9507 	SHRINK;
9508 
9509 	if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9510 	    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9511 	                "detected an error in element content\n");
9512 	    ctxt->instate = XML_PARSER_EOF;
9513             break;
9514 	}
9515     }
9516 }
9517 
9518 /**
9519  * xmlParseElement:
9520  * @ctxt:  an XML parser context
9521  *
9522  * parse an XML element, this is highly recursive
9523  *
9524  * [39] element ::= EmptyElemTag | STag content ETag
9525  *
9526  * [ WFC: Element Type Match ]
9527  * The Name in an element's end-tag must match the element type in the
9528  * start-tag.
9529  *
9530  */
9531 
9532 void
xmlParseElement(xmlParserCtxtPtr ctxt)9533 xmlParseElement(xmlParserCtxtPtr ctxt) {
9534     const xmlChar *name;
9535     const xmlChar *prefix = NULL;
9536     const xmlChar *URI = NULL;
9537     xmlParserNodeInfo node_info;
9538     int line, tlen;
9539     xmlNodePtr ret;
9540     int nsNr = ctxt->nsNr;
9541 
9542     if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9543         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9544 	xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9545 		 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9546 			  xmlParserMaxDepth);
9547 	ctxt->instate = XML_PARSER_EOF;
9548 	return;
9549     }
9550 
9551     /* Capture start position */
9552     if (ctxt->record_info) {
9553         node_info.begin_pos = ctxt->input->consumed +
9554                           (CUR_PTR - ctxt->input->base);
9555 	node_info.begin_line = ctxt->input->line;
9556     }
9557 
9558     if (ctxt->spaceNr == 0)
9559 	spacePush(ctxt, -1);
9560     else if (*ctxt->space == -2)
9561 	spacePush(ctxt, -1);
9562     else
9563 	spacePush(ctxt, *ctxt->space);
9564 
9565     line = ctxt->input->line;
9566 #ifdef LIBXML_SAX1_ENABLED
9567     if (ctxt->sax2)
9568 #endif /* LIBXML_SAX1_ENABLED */
9569         name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9570 #ifdef LIBXML_SAX1_ENABLED
9571     else
9572 	name = xmlParseStartTag(ctxt);
9573 #endif /* LIBXML_SAX1_ENABLED */
9574     if (ctxt->instate == XML_PARSER_EOF)
9575 	return;
9576     if (name == NULL) {
9577 	spacePop(ctxt);
9578         return;
9579     }
9580     namePush(ctxt, name);
9581     ret = ctxt->node;
9582 
9583 #ifdef LIBXML_VALID_ENABLED
9584     /*
9585      * [ VC: Root Element Type ]
9586      * The Name in the document type declaration must match the element
9587      * type of the root element.
9588      */
9589     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9590         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9591         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9592 #endif /* LIBXML_VALID_ENABLED */
9593 
9594     /*
9595      * Check for an Empty Element.
9596      */
9597     if ((RAW == '/') && (NXT(1) == '>')) {
9598         SKIP(2);
9599 	if (ctxt->sax2) {
9600 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9601 		(!ctxt->disableSAX))
9602 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9603 #ifdef LIBXML_SAX1_ENABLED
9604 	} else {
9605 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9606 		(!ctxt->disableSAX))
9607 		ctxt->sax->endElement(ctxt->userData, name);
9608 #endif /* LIBXML_SAX1_ENABLED */
9609 	}
9610 	namePop(ctxt);
9611 	spacePop(ctxt);
9612 	if (nsNr != ctxt->nsNr)
9613 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9614 	if ( ret != NULL && ctxt->record_info ) {
9615 	   node_info.end_pos = ctxt->input->consumed +
9616 			      (CUR_PTR - ctxt->input->base);
9617 	   node_info.end_line = ctxt->input->line;
9618 	   node_info.node = ret;
9619 	   xmlParserAddNodeInfo(ctxt, &node_info);
9620 	}
9621 	return;
9622     }
9623     if (RAW == '>') {
9624         NEXT1;
9625     } else {
9626         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9627 		     "Couldn't find end of Start Tag %s line %d\n",
9628 		                name, line, NULL);
9629 
9630 	/*
9631 	 * end of parsing of this node.
9632 	 */
9633 	nodePop(ctxt);
9634 	namePop(ctxt);
9635 	spacePop(ctxt);
9636 	if (nsNr != ctxt->nsNr)
9637 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9638 
9639 	/*
9640 	 * Capture end position and add node
9641 	 */
9642 	if ( ret != NULL && ctxt->record_info ) {
9643 	   node_info.end_pos = ctxt->input->consumed +
9644 			      (CUR_PTR - ctxt->input->base);
9645 	   node_info.end_line = ctxt->input->line;
9646 	   node_info.node = ret;
9647 	   xmlParserAddNodeInfo(ctxt, &node_info);
9648 	}
9649 	return;
9650     }
9651 
9652     /*
9653      * Parse the content of the element:
9654      */
9655     xmlParseContent(ctxt);
9656     if (ctxt->instate == XML_PARSER_EOF)
9657 	return;
9658     if (!IS_BYTE_CHAR(RAW)) {
9659         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9660 	 "Premature end of data in tag %s line %d\n",
9661 		                name, line, NULL);
9662 
9663 	/*
9664 	 * end of parsing of this node.
9665 	 */
9666 	nodePop(ctxt);
9667 	namePop(ctxt);
9668 	spacePop(ctxt);
9669 	if (nsNr != ctxt->nsNr)
9670 	    nsPop(ctxt, ctxt->nsNr - nsNr);
9671 	return;
9672     }
9673 
9674     /*
9675      * parse the end of tag: '</' should be here.
9676      */
9677     if (ctxt->sax2) {
9678 	xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9679 	namePop(ctxt);
9680     }
9681 #ifdef LIBXML_SAX1_ENABLED
9682       else
9683 	xmlParseEndTag1(ctxt, line);
9684 #endif /* LIBXML_SAX1_ENABLED */
9685 
9686     /*
9687      * Capture end position and add node
9688      */
9689     if ( ret != NULL && ctxt->record_info ) {
9690        node_info.end_pos = ctxt->input->consumed +
9691                           (CUR_PTR - ctxt->input->base);
9692        node_info.end_line = ctxt->input->line;
9693        node_info.node = ret;
9694        xmlParserAddNodeInfo(ctxt, &node_info);
9695     }
9696 }
9697 
9698 /**
9699  * xmlParseVersionNum:
9700  * @ctxt:  an XML parser context
9701  *
9702  * parse the XML version value.
9703  *
9704  * [26] VersionNum ::= '1.' [0-9]+
9705  *
9706  * In practice allow [0-9].[0-9]+ at that level
9707  *
9708  * Returns the string giving the XML version number, or NULL
9709  */
9710 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9711 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9712     xmlChar *buf = NULL;
9713     int len = 0;
9714     int size = 10;
9715     xmlChar cur;
9716 
9717     buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9718     if (buf == NULL) {
9719 	xmlErrMemory(ctxt, NULL);
9720 	return(NULL);
9721     }
9722     cur = CUR;
9723     if (!((cur >= '0') && (cur <= '9'))) {
9724 	xmlFree(buf);
9725 	return(NULL);
9726     }
9727     buf[len++] = cur;
9728     NEXT;
9729     cur=CUR;
9730     if (cur != '.') {
9731 	xmlFree(buf);
9732 	return(NULL);
9733     }
9734     buf[len++] = cur;
9735     NEXT;
9736     cur=CUR;
9737     while ((cur >= '0') && (cur <= '9')) {
9738 	if (len + 1 >= size) {
9739 	    xmlChar *tmp;
9740 
9741 	    size *= 2;
9742 	    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9743 	    if (tmp == NULL) {
9744 	        xmlFree(buf);
9745 		xmlErrMemory(ctxt, NULL);
9746 		return(NULL);
9747 	    }
9748 	    buf = tmp;
9749 	}
9750 	buf[len++] = cur;
9751 	NEXT;
9752 	cur=CUR;
9753     }
9754     buf[len] = 0;
9755     return(buf);
9756 }
9757 
9758 /**
9759  * xmlParseVersionInfo:
9760  * @ctxt:  an XML parser context
9761  *
9762  * parse the XML version.
9763  *
9764  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9765  *
9766  * [25] Eq ::= S? '=' S?
9767  *
9768  * Returns the version string, e.g. "1.0"
9769  */
9770 
9771 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)9772 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9773     xmlChar *version = NULL;
9774 
9775     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9776 	SKIP(7);
9777 	SKIP_BLANKS;
9778 	if (RAW != '=') {
9779 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9780 	    return(NULL);
9781         }
9782 	NEXT;
9783 	SKIP_BLANKS;
9784 	if (RAW == '"') {
9785 	    NEXT;
9786 	    version = xmlParseVersionNum(ctxt);
9787 	    if (RAW != '"') {
9788 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9789 	    } else
9790 	        NEXT;
9791 	} else if (RAW == '\''){
9792 	    NEXT;
9793 	    version = xmlParseVersionNum(ctxt);
9794 	    if (RAW != '\'') {
9795 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9796 	    } else
9797 	        NEXT;
9798 	} else {
9799 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9800 	}
9801     }
9802     return(version);
9803 }
9804 
9805 /**
9806  * xmlParseEncName:
9807  * @ctxt:  an XML parser context
9808  *
9809  * parse the XML encoding name
9810  *
9811  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9812  *
9813  * Returns the encoding name value or NULL
9814  */
9815 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)9816 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9817     xmlChar *buf = NULL;
9818     int len = 0;
9819     int size = 10;
9820     xmlChar cur;
9821 
9822     cur = CUR;
9823     if (((cur >= 'a') && (cur <= 'z')) ||
9824         ((cur >= 'A') && (cur <= 'Z'))) {
9825 	buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9826 	if (buf == NULL) {
9827 	    xmlErrMemory(ctxt, NULL);
9828 	    return(NULL);
9829 	}
9830 
9831 	buf[len++] = cur;
9832 	NEXT;
9833 	cur = CUR;
9834 	while (((cur >= 'a') && (cur <= 'z')) ||
9835 	       ((cur >= 'A') && (cur <= 'Z')) ||
9836 	       ((cur >= '0') && (cur <= '9')) ||
9837 	       (cur == '.') || (cur == '_') ||
9838 	       (cur == '-')) {
9839 	    if (len + 1 >= size) {
9840 	        xmlChar *tmp;
9841 
9842 		size *= 2;
9843 		tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9844 		if (tmp == NULL) {
9845 		    xmlErrMemory(ctxt, NULL);
9846 		    xmlFree(buf);
9847 		    return(NULL);
9848 		}
9849 		buf = tmp;
9850 	    }
9851 	    buf[len++] = cur;
9852 	    NEXT;
9853 	    cur = CUR;
9854 	    if (cur == 0) {
9855 	        SHRINK;
9856 		GROW;
9857 		cur = CUR;
9858 	    }
9859         }
9860 	buf[len] = 0;
9861     } else {
9862 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9863     }
9864     return(buf);
9865 }
9866 
9867 /**
9868  * xmlParseEncodingDecl:
9869  * @ctxt:  an XML parser context
9870  *
9871  * parse the XML encoding declaration
9872  *
9873  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
9874  *
9875  * this setups the conversion filters.
9876  *
9877  * Returns the encoding value or NULL
9878  */
9879 
9880 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)9881 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9882     xmlChar *encoding = NULL;
9883 
9884     SKIP_BLANKS;
9885     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9886 	SKIP(8);
9887 	SKIP_BLANKS;
9888 	if (RAW != '=') {
9889 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9890 	    return(NULL);
9891         }
9892 	NEXT;
9893 	SKIP_BLANKS;
9894 	if (RAW == '"') {
9895 	    NEXT;
9896 	    encoding = xmlParseEncName(ctxt);
9897 	    if (RAW != '"') {
9898 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9899 	    } else
9900 	        NEXT;
9901 	} else if (RAW == '\''){
9902 	    NEXT;
9903 	    encoding = xmlParseEncName(ctxt);
9904 	    if (RAW != '\'') {
9905 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9906 	    } else
9907 	        NEXT;
9908 	} else {
9909 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9910 	}
9911 	/*
9912 	 * UTF-16 encoding stwich has already taken place at this stage,
9913 	 * more over the little-endian/big-endian selection is already done
9914 	 */
9915         if ((encoding != NULL) &&
9916 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9917 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9918 	    /*
9919 	     * If no encoding was passed to the parser, that we are
9920 	     * using UTF-16 and no decoder is present i.e. the
9921 	     * document is apparently UTF-8 compatible, then raise an
9922 	     * encoding mismatch fatal error
9923 	     */
9924 	    if ((ctxt->encoding == NULL) &&
9925 	        (ctxt->input->buf != NULL) &&
9926 	        (ctxt->input->buf->encoder == NULL)) {
9927 		xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9928 		  "Document labelled UTF-16 but has UTF-8 content\n");
9929 	    }
9930 	    if (ctxt->encoding != NULL)
9931 		xmlFree((xmlChar *) ctxt->encoding);
9932 	    ctxt->encoding = encoding;
9933 	}
9934 	/*
9935 	 * UTF-8 encoding is handled natively
9936 	 */
9937         else if ((encoding != NULL) &&
9938 	    ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9939 	     (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9940 	    if (ctxt->encoding != NULL)
9941 		xmlFree((xmlChar *) ctxt->encoding);
9942 	    ctxt->encoding = encoding;
9943 	}
9944 	else if (encoding != NULL) {
9945 	    xmlCharEncodingHandlerPtr handler;
9946 
9947 	    if (ctxt->input->encoding != NULL)
9948 		xmlFree((xmlChar *) ctxt->input->encoding);
9949 	    ctxt->input->encoding = encoding;
9950 
9951             handler = xmlFindCharEncodingHandler((const char *) encoding);
9952 	    if (handler != NULL) {
9953 		xmlSwitchToEncoding(ctxt, handler);
9954 	    } else {
9955 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9956 			"Unsupported encoding %s\n", encoding);
9957 		return(NULL);
9958 	    }
9959 	}
9960     }
9961     return(encoding);
9962 }
9963 
9964 /**
9965  * xmlParseSDDecl:
9966  * @ctxt:  an XML parser context
9967  *
9968  * parse the XML standalone declaration
9969  *
9970  * [32] SDDecl ::= S 'standalone' Eq
9971  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9972  *
9973  * [ VC: Standalone Document Declaration ]
9974  * TODO The standalone document declaration must have the value "no"
9975  * if any external markup declarations contain declarations of:
9976  *  - attributes with default values, if elements to which these
9977  *    attributes apply appear in the document without specifications
9978  *    of values for these attributes, or
9979  *  - entities (other than amp, lt, gt, apos, quot), if references
9980  *    to those entities appear in the document, or
9981  *  - attributes with values subject to normalization, where the
9982  *    attribute appears in the document with a value which will change
9983  *    as a result of normalization, or
9984  *  - element types with element content, if white space occurs directly
9985  *    within any instance of those types.
9986  *
9987  * Returns:
9988  *   1 if standalone="yes"
9989  *   0 if standalone="no"
9990  *  -2 if standalone attribute is missing or invalid
9991  *	  (A standalone value of -2 means that the XML declaration was found,
9992  *	   but no value was specified for the standalone attribute).
9993  */
9994 
9995 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)9996 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9997     int standalone = -2;
9998 
9999     SKIP_BLANKS;
10000     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10001 	SKIP(10);
10002         SKIP_BLANKS;
10003 	if (RAW != '=') {
10004 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10005 	    return(standalone);
10006         }
10007 	NEXT;
10008 	SKIP_BLANKS;
10009         if (RAW == '\''){
10010 	    NEXT;
10011 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10012 	        standalone = 0;
10013                 SKIP(2);
10014 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10015 	               (NXT(2) == 's')) {
10016 	        standalone = 1;
10017 		SKIP(3);
10018             } else {
10019 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10020 	    }
10021 	    if (RAW != '\'') {
10022 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10023 	    } else
10024 	        NEXT;
10025 	} else if (RAW == '"'){
10026 	    NEXT;
10027 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10028 	        standalone = 0;
10029 		SKIP(2);
10030 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10031 	               (NXT(2) == 's')) {
10032 	        standalone = 1;
10033                 SKIP(3);
10034             } else {
10035 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10036 	    }
10037 	    if (RAW != '"') {
10038 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10039 	    } else
10040 	        NEXT;
10041 	} else {
10042 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10043         }
10044     }
10045     return(standalone);
10046 }
10047 
10048 /**
10049  * xmlParseXMLDecl:
10050  * @ctxt:  an XML parser context
10051  *
10052  * parse an XML declaration header
10053  *
10054  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10055  */
10056 
10057 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10058 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10059     xmlChar *version;
10060 
10061     /*
10062      * This value for standalone indicates that the document has an
10063      * XML declaration but it does not have a standalone attribute.
10064      * It will be overwritten later if a standalone attribute is found.
10065      */
10066     ctxt->input->standalone = -2;
10067 
10068     /*
10069      * We know that '<?xml' is here.
10070      */
10071     SKIP(5);
10072 
10073     if (!IS_BLANK_CH(RAW)) {
10074 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10075 	               "Blank needed after '<?xml'\n");
10076     }
10077     SKIP_BLANKS;
10078 
10079     /*
10080      * We must have the VersionInfo here.
10081      */
10082     version = xmlParseVersionInfo(ctxt);
10083     if (version == NULL) {
10084 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10085     } else {
10086 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10087 	    /*
10088 	     * Changed here for XML-1.0 5th edition
10089 	     */
10090 	    if (ctxt->options & XML_PARSE_OLD10) {
10091 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10092 			          "Unsupported version '%s'\n",
10093 			          version);
10094 	    } else {
10095 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10096 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10097 		                  "Unsupported version '%s'\n",
10098 				  version, NULL);
10099 		} else {
10100 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10101 				      "Unsupported version '%s'\n",
10102 				      version);
10103 		}
10104 	    }
10105 	}
10106 	if (ctxt->version != NULL)
10107 	    xmlFree((void *) ctxt->version);
10108 	ctxt->version = version;
10109     }
10110 
10111     /*
10112      * We may have the encoding declaration
10113      */
10114     if (!IS_BLANK_CH(RAW)) {
10115         if ((RAW == '?') && (NXT(1) == '>')) {
10116 	    SKIP(2);
10117 	    return;
10118 	}
10119 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10120     }
10121     xmlParseEncodingDecl(ctxt);
10122     if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10123 	/*
10124 	 * The XML REC instructs us to stop parsing right here
10125 	 */
10126         return;
10127     }
10128 
10129     /*
10130      * We may have the standalone status.
10131      */
10132     if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10133         if ((RAW == '?') && (NXT(1) == '>')) {
10134 	    SKIP(2);
10135 	    return;
10136 	}
10137 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10138     }
10139 
10140     /*
10141      * We can grow the input buffer freely at that point
10142      */
10143     GROW;
10144 
10145     SKIP_BLANKS;
10146     ctxt->input->standalone = xmlParseSDDecl(ctxt);
10147 
10148     SKIP_BLANKS;
10149     if ((RAW == '?') && (NXT(1) == '>')) {
10150         SKIP(2);
10151     } else if (RAW == '>') {
10152         /* Deprecated old WD ... */
10153 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10154 	NEXT;
10155     } else {
10156 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10157 	MOVETO_ENDTAG(CUR_PTR);
10158 	NEXT;
10159     }
10160 }
10161 
10162 /**
10163  * xmlParseMisc:
10164  * @ctxt:  an XML parser context
10165  *
10166  * parse an XML Misc* optional field.
10167  *
10168  * [27] Misc ::= Comment | PI |  S
10169  */
10170 
10171 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10172 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10173     while ((ctxt->instate != XML_PARSER_EOF) &&
10174            (((RAW == '<') && (NXT(1) == '?')) ||
10175             (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10176             IS_BLANK_CH(CUR))) {
10177         if ((RAW == '<') && (NXT(1) == '?')) {
10178 	    xmlParsePI(ctxt);
10179 	} else if (IS_BLANK_CH(CUR)) {
10180 	    NEXT;
10181 	} else
10182 	    xmlParseComment(ctxt);
10183     }
10184 }
10185 
10186 /**
10187  * xmlParseDocument:
10188  * @ctxt:  an XML parser context
10189  *
10190  * parse an XML document (and build a tree if using the standard SAX
10191  * interface).
10192  *
10193  * [1] document ::= prolog element Misc*
10194  *
10195  * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10196  *
10197  * Returns 0, -1 in case of error. the parser context is augmented
10198  *                as a result of the parsing.
10199  */
10200 
10201 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10202 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10203     xmlChar start[4];
10204     xmlCharEncoding enc;
10205 
10206     xmlInitParser();
10207 
10208     if ((ctxt == NULL) || (ctxt->input == NULL))
10209         return(-1);
10210 
10211     GROW;
10212 
10213     /*
10214      * SAX: detecting the level.
10215      */
10216     xmlDetectSAX2(ctxt);
10217 
10218     /*
10219      * SAX: beginning of the document processing.
10220      */
10221     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10222         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10223     if (ctxt->instate == XML_PARSER_EOF)
10224 	return(-1);
10225 
10226     if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10227         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10228 	/*
10229 	 * Get the 4 first bytes and decode the charset
10230 	 * if enc != XML_CHAR_ENCODING_NONE
10231 	 * plug some encoding conversion routines.
10232 	 */
10233 	start[0] = RAW;
10234 	start[1] = NXT(1);
10235 	start[2] = NXT(2);
10236 	start[3] = NXT(3);
10237 	enc = xmlDetectCharEncoding(&start[0], 4);
10238 	if (enc != XML_CHAR_ENCODING_NONE) {
10239 	    xmlSwitchEncoding(ctxt, enc);
10240 	}
10241     }
10242 
10243 
10244     if (CUR == 0) {
10245 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10246     }
10247 
10248     /*
10249      * Check for the XMLDecl in the Prolog.
10250      * do not GROW here to avoid the detected encoder to decode more
10251      * than just the first line, unless the amount of data is really
10252      * too small to hold "<?xml version="1.0" encoding="foo"
10253      */
10254     if ((ctxt->input->end - ctxt->input->cur) < 35) {
10255        GROW;
10256     }
10257     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10258 
10259 	/*
10260 	 * Note that we will switch encoding on the fly.
10261 	 */
10262 	xmlParseXMLDecl(ctxt);
10263 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10264 	    /*
10265 	     * The XML REC instructs us to stop parsing right here
10266 	     */
10267 	    return(-1);
10268 	}
10269 	ctxt->standalone = ctxt->input->standalone;
10270 	SKIP_BLANKS;
10271     } else {
10272 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10273     }
10274     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10275         ctxt->sax->startDocument(ctxt->userData);
10276     if (ctxt->instate == XML_PARSER_EOF)
10277 	return(-1);
10278 
10279     /*
10280      * The Misc part of the Prolog
10281      */
10282     GROW;
10283     xmlParseMisc(ctxt);
10284 
10285     /*
10286      * Then possibly doc type declaration(s) and more Misc
10287      * (doctypedecl Misc*)?
10288      */
10289     GROW;
10290     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10291 
10292 	ctxt->inSubset = 1;
10293 	xmlParseDocTypeDecl(ctxt);
10294 	if (RAW == '[') {
10295 	    ctxt->instate = XML_PARSER_DTD;
10296 	    xmlParseInternalSubset(ctxt);
10297 	    if (ctxt->instate == XML_PARSER_EOF)
10298 		return(-1);
10299 	}
10300 
10301 	/*
10302 	 * Create and update the external subset.
10303 	 */
10304 	ctxt->inSubset = 2;
10305 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10306 	    (!ctxt->disableSAX))
10307 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10308 	                              ctxt->extSubSystem, ctxt->extSubURI);
10309 	if (ctxt->instate == XML_PARSER_EOF)
10310 	    return(-1);
10311 	ctxt->inSubset = 0;
10312 
10313         xmlCleanSpecialAttr(ctxt);
10314 
10315 	ctxt->instate = XML_PARSER_PROLOG;
10316 	xmlParseMisc(ctxt);
10317     }
10318 
10319     /*
10320      * Time to start parsing the tree itself
10321      */
10322     GROW;
10323     if (RAW != '<') {
10324 	xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10325 		       "Start tag expected, '<' not found\n");
10326     } else {
10327 	ctxt->instate = XML_PARSER_CONTENT;
10328 	xmlParseElement(ctxt);
10329 	ctxt->instate = XML_PARSER_EPILOG;
10330 
10331 
10332 	/*
10333 	 * The Misc part at the end
10334 	 */
10335 	xmlParseMisc(ctxt);
10336 
10337 	if (RAW != 0) {
10338 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10339 	}
10340 	ctxt->instate = XML_PARSER_EOF;
10341     }
10342 
10343     /*
10344      * SAX: end of the document processing.
10345      */
10346     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10347         ctxt->sax->endDocument(ctxt->userData);
10348 
10349     /*
10350      * Remove locally kept entity definitions if the tree was not built
10351      */
10352     if ((ctxt->myDoc != NULL) &&
10353 	(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10354 	xmlFreeDoc(ctxt->myDoc);
10355 	ctxt->myDoc = NULL;
10356     }
10357 
10358     if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10359         ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10360 	if (ctxt->valid)
10361 	    ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10362 	if (ctxt->nsWellFormed)
10363 	    ctxt->myDoc->properties |= XML_DOC_NSVALID;
10364 	if (ctxt->options & XML_PARSE_OLD10)
10365 	    ctxt->myDoc->properties |= XML_DOC_OLD10;
10366     }
10367     if (! ctxt->wellFormed) {
10368 	ctxt->valid = 0;
10369 	return(-1);
10370     }
10371     return(0);
10372 }
10373 
10374 /**
10375  * xmlParseExtParsedEnt:
10376  * @ctxt:  an XML parser context
10377  *
10378  * parse a general parsed entity
10379  * An external general parsed entity is well-formed if it matches the
10380  * production labeled extParsedEnt.
10381  *
10382  * [78] extParsedEnt ::= TextDecl? content
10383  *
10384  * Returns 0, -1 in case of error. the parser context is augmented
10385  *                as a result of the parsing.
10386  */
10387 
10388 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10389 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10390     xmlChar start[4];
10391     xmlCharEncoding enc;
10392 
10393     if ((ctxt == NULL) || (ctxt->input == NULL))
10394         return(-1);
10395 
10396     xmlDefaultSAXHandlerInit();
10397 
10398     xmlDetectSAX2(ctxt);
10399 
10400     GROW;
10401 
10402     /*
10403      * SAX: beginning of the document processing.
10404      */
10405     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10406         ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10407 
10408     /*
10409      * Get the 4 first bytes and decode the charset
10410      * if enc != XML_CHAR_ENCODING_NONE
10411      * plug some encoding conversion routines.
10412      */
10413     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10414 	start[0] = RAW;
10415 	start[1] = NXT(1);
10416 	start[2] = NXT(2);
10417 	start[3] = NXT(3);
10418 	enc = xmlDetectCharEncoding(start, 4);
10419 	if (enc != XML_CHAR_ENCODING_NONE) {
10420 	    xmlSwitchEncoding(ctxt, enc);
10421 	}
10422     }
10423 
10424 
10425     if (CUR == 0) {
10426 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10427     }
10428 
10429     /*
10430      * Check for the XMLDecl in the Prolog.
10431      */
10432     GROW;
10433     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10434 
10435 	/*
10436 	 * Note that we will switch encoding on the fly.
10437 	 */
10438 	xmlParseXMLDecl(ctxt);
10439 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10440 	    /*
10441 	     * The XML REC instructs us to stop parsing right here
10442 	     */
10443 	    return(-1);
10444 	}
10445 	SKIP_BLANKS;
10446     } else {
10447 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10448     }
10449     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10450         ctxt->sax->startDocument(ctxt->userData);
10451     if (ctxt->instate == XML_PARSER_EOF)
10452 	return(-1);
10453 
10454     /*
10455      * Doing validity checking on chunk doesn't make sense
10456      */
10457     ctxt->instate = XML_PARSER_CONTENT;
10458     ctxt->validate = 0;
10459     ctxt->loadsubset = 0;
10460     ctxt->depth = 0;
10461 
10462     xmlParseContent(ctxt);
10463     if (ctxt->instate == XML_PARSER_EOF)
10464 	return(-1);
10465 
10466     if ((RAW == '<') && (NXT(1) == '/')) {
10467 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10468     } else if (RAW != 0) {
10469 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10470     }
10471 
10472     /*
10473      * SAX: end of the document processing.
10474      */
10475     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10476         ctxt->sax->endDocument(ctxt->userData);
10477 
10478     if (! ctxt->wellFormed) return(-1);
10479     return(0);
10480 }
10481 
10482 #ifdef LIBXML_PUSH_ENABLED
10483 /************************************************************************
10484  *									*
10485  * 		Progressive parsing interfaces				*
10486  *									*
10487  ************************************************************************/
10488 
10489 /**
10490  * xmlParseLookupSequence:
10491  * @ctxt:  an XML parser context
10492  * @first:  the first char to lookup
10493  * @next:  the next char to lookup or zero
10494  * @third:  the next char to lookup or zero
10495  *
10496  * Try to find if a sequence (first, next, third) or  just (first next) or
10497  * (first) is available in the input stream.
10498  * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10499  * to avoid rescanning sequences of bytes, it DOES change the state of the
10500  * parser, do not use liberally.
10501  *
10502  * Returns the index to the current parsing point if the full sequence
10503  *      is available, -1 otherwise.
10504  */
10505 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10506 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10507                        xmlChar next, xmlChar third) {
10508     int base, len;
10509     xmlParserInputPtr in;
10510     const xmlChar *buf;
10511 
10512     in = ctxt->input;
10513     if (in == NULL) return(-1);
10514     base = in->cur - in->base;
10515     if (base < 0) return(-1);
10516     if (ctxt->checkIndex > base)
10517         base = ctxt->checkIndex;
10518     if (in->buf == NULL) {
10519 	buf = in->base;
10520 	len = in->length;
10521     } else {
10522 	buf = in->buf->buffer->content;
10523 	len = in->buf->buffer->use;
10524     }
10525     /* take into account the sequence length */
10526     if (third) len -= 2;
10527     else if (next) len --;
10528     for (;base < len;base++) {
10529         if (buf[base] == first) {
10530 	    if (third != 0) {
10531 		if ((buf[base + 1] != next) ||
10532 		    (buf[base + 2] != third)) continue;
10533 	    } else if (next != 0) {
10534 		if (buf[base + 1] != next) continue;
10535 	    }
10536 	    ctxt->checkIndex = 0;
10537 #ifdef DEBUG_PUSH
10538 	    if (next == 0)
10539 		xmlGenericError(xmlGenericErrorContext,
10540 			"PP: lookup '%c' found at %d\n",
10541 			first, base);
10542 	    else if (third == 0)
10543 		xmlGenericError(xmlGenericErrorContext,
10544 			"PP: lookup '%c%c' found at %d\n",
10545 			first, next, base);
10546 	    else
10547 		xmlGenericError(xmlGenericErrorContext,
10548 			"PP: lookup '%c%c%c' found at %d\n",
10549 			first, next, third, base);
10550 #endif
10551 	    return(base - (in->cur - in->base));
10552 	}
10553     }
10554     ctxt->checkIndex = base;
10555 #ifdef DEBUG_PUSH
10556     if (next == 0)
10557 	xmlGenericError(xmlGenericErrorContext,
10558 		"PP: lookup '%c' failed\n", first);
10559     else if (third == 0)
10560 	xmlGenericError(xmlGenericErrorContext,
10561 		"PP: lookup '%c%c' failed\n", first, next);
10562     else
10563 	xmlGenericError(xmlGenericErrorContext,
10564 		"PP: lookup '%c%c%c' failed\n", first, next, third);
10565 #endif
10566     return(-1);
10567 }
10568 
10569 /**
10570  * xmlParseGetLasts:
10571  * @ctxt:  an XML parser context
10572  * @lastlt:  pointer to store the last '<' from the input
10573  * @lastgt:  pointer to store the last '>' from the input
10574  *
10575  * Lookup the last < and > in the current chunk
10576  */
10577 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10578 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10579                  const xmlChar **lastgt) {
10580     const xmlChar *tmp;
10581 
10582     if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10583 	xmlGenericError(xmlGenericErrorContext,
10584 		    "Internal error: xmlParseGetLasts\n");
10585 	return;
10586     }
10587     if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10588         tmp = ctxt->input->end;
10589 	tmp--;
10590 	while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10591 	if (tmp < ctxt->input->base) {
10592 	    *lastlt = NULL;
10593 	    *lastgt = NULL;
10594 	} else {
10595 	    *lastlt = tmp;
10596 	    tmp++;
10597 	    while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10598 	        if (*tmp == '\'') {
10599 		    tmp++;
10600 		    while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10601 		    if (tmp < ctxt->input->end) tmp++;
10602 		} else if (*tmp == '"') {
10603 		    tmp++;
10604 		    while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10605 		    if (tmp < ctxt->input->end) tmp++;
10606 		} else
10607 		    tmp++;
10608 	    }
10609 	    if (tmp < ctxt->input->end)
10610 	        *lastgt = tmp;
10611 	    else {
10612 	        tmp = *lastlt;
10613 		tmp--;
10614 		while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10615 		if (tmp >= ctxt->input->base)
10616 		    *lastgt = tmp;
10617 		else
10618 		    *lastgt = NULL;
10619 	    }
10620 	}
10621     } else {
10622         *lastlt = NULL;
10623 	*lastgt = NULL;
10624     }
10625 }
10626 /**
10627  * xmlCheckCdataPush:
10628  * @cur: pointer to the bock of characters
10629  * @len: length of the block in bytes
10630  *
10631  * Check that the block of characters is okay as SCdata content [20]
10632  *
10633  * Returns the number of bytes to pass if okay, a negative index where an
10634  *         UTF-8 error occured otherwise
10635  */
10636 static int
xmlCheckCdataPush(const xmlChar * utf,int len)10637 xmlCheckCdataPush(const xmlChar *utf, int len) {
10638     int ix;
10639     unsigned char c;
10640     int codepoint;
10641 
10642     if ((utf == NULL) || (len <= 0))
10643         return(0);
10644 
10645     for (ix = 0; ix < len;) {      /* string is 0-terminated */
10646         c = utf[ix];
10647         if ((c & 0x80) == 0x00) {	/* 1-byte code, starts with 10 */
10648 	    if (c >= 0x20)
10649 		ix++;
10650 	    else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10651 	        ix++;
10652 	    else
10653 	        return(-ix);
10654 	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10655 	    if (ix + 2 > len) return(ix);
10656 	    if ((utf[ix+1] & 0xc0 ) != 0x80)
10657 	        return(-ix);
10658 	    codepoint = (utf[ix] & 0x1f) << 6;
10659 	    codepoint |= utf[ix+1] & 0x3f;
10660 	    if (!xmlIsCharQ(codepoint))
10661 	        return(-ix);
10662 	    ix += 2;
10663 	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10664 	    if (ix + 3 > len) return(ix);
10665 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10666 	        ((utf[ix+2] & 0xc0) != 0x80))
10667 		    return(-ix);
10668 	    codepoint = (utf[ix] & 0xf) << 12;
10669 	    codepoint |= (utf[ix+1] & 0x3f) << 6;
10670 	    codepoint |= utf[ix+2] & 0x3f;
10671 	    if (!xmlIsCharQ(codepoint))
10672 	        return(-ix);
10673 	    ix += 3;
10674 	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10675 	    if (ix + 4 > len) return(ix);
10676 	    if (((utf[ix+1] & 0xc0) != 0x80) ||
10677 	        ((utf[ix+2] & 0xc0) != 0x80) ||
10678 		((utf[ix+3] & 0xc0) != 0x80))
10679 		    return(-ix);
10680 	    codepoint = (utf[ix] & 0x7) << 18;
10681 	    codepoint |= (utf[ix+1] & 0x3f) << 12;
10682 	    codepoint |= (utf[ix+2] & 0x3f) << 6;
10683 	    codepoint |= utf[ix+3] & 0x3f;
10684 	    if (!xmlIsCharQ(codepoint))
10685 	        return(-ix);
10686 	    ix += 4;
10687 	} else				/* unknown encoding */
10688 	    return(-ix);
10689       }
10690       return(ix);
10691 }
10692 
10693 /**
10694  * xmlParseTryOrFinish:
10695  * @ctxt:  an XML parser context
10696  * @terminate:  last chunk indicator
10697  *
10698  * Try to progress on parsing
10699  *
10700  * Returns zero if no parsing was possible
10701  */
10702 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)10703 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10704     int ret = 0;
10705     int avail, tlen;
10706     xmlChar cur, next;
10707     const xmlChar *lastlt, *lastgt;
10708 
10709     if (ctxt->input == NULL)
10710         return(0);
10711 
10712 #ifdef DEBUG_PUSH
10713     switch (ctxt->instate) {
10714 	case XML_PARSER_EOF:
10715 	    xmlGenericError(xmlGenericErrorContext,
10716 		    "PP: try EOF\n"); break;
10717 	case XML_PARSER_START:
10718 	    xmlGenericError(xmlGenericErrorContext,
10719 		    "PP: try START\n"); break;
10720 	case XML_PARSER_MISC:
10721 	    xmlGenericError(xmlGenericErrorContext,
10722 		    "PP: try MISC\n");break;
10723 	case XML_PARSER_COMMENT:
10724 	    xmlGenericError(xmlGenericErrorContext,
10725 		    "PP: try COMMENT\n");break;
10726 	case XML_PARSER_PROLOG:
10727 	    xmlGenericError(xmlGenericErrorContext,
10728 		    "PP: try PROLOG\n");break;
10729 	case XML_PARSER_START_TAG:
10730 	    xmlGenericError(xmlGenericErrorContext,
10731 		    "PP: try START_TAG\n");break;
10732 	case XML_PARSER_CONTENT:
10733 	    xmlGenericError(xmlGenericErrorContext,
10734 		    "PP: try CONTENT\n");break;
10735 	case XML_PARSER_CDATA_SECTION:
10736 	    xmlGenericError(xmlGenericErrorContext,
10737 		    "PP: try CDATA_SECTION\n");break;
10738 	case XML_PARSER_END_TAG:
10739 	    xmlGenericError(xmlGenericErrorContext,
10740 		    "PP: try END_TAG\n");break;
10741 	case XML_PARSER_ENTITY_DECL:
10742 	    xmlGenericError(xmlGenericErrorContext,
10743 		    "PP: try ENTITY_DECL\n");break;
10744 	case XML_PARSER_ENTITY_VALUE:
10745 	    xmlGenericError(xmlGenericErrorContext,
10746 		    "PP: try ENTITY_VALUE\n");break;
10747 	case XML_PARSER_ATTRIBUTE_VALUE:
10748 	    xmlGenericError(xmlGenericErrorContext,
10749 		    "PP: try ATTRIBUTE_VALUE\n");break;
10750 	case XML_PARSER_DTD:
10751 	    xmlGenericError(xmlGenericErrorContext,
10752 		    "PP: try DTD\n");break;
10753 	case XML_PARSER_EPILOG:
10754 	    xmlGenericError(xmlGenericErrorContext,
10755 		    "PP: try EPILOG\n");break;
10756 	case XML_PARSER_PI:
10757 	    xmlGenericError(xmlGenericErrorContext,
10758 		    "PP: try PI\n");break;
10759         case XML_PARSER_IGNORE:
10760             xmlGenericError(xmlGenericErrorContext,
10761 		    "PP: try IGNORE\n");break;
10762     }
10763 #endif
10764 
10765     if ((ctxt->input != NULL) &&
10766         (ctxt->input->cur - ctxt->input->base > 4096)) {
10767 	xmlSHRINK(ctxt);
10768 	ctxt->checkIndex = 0;
10769     }
10770     xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10771 
10772     while (ctxt->instate != XML_PARSER_EOF) {
10773 	if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10774 	    return(0);
10775 
10776 
10777 	/*
10778 	 * Pop-up of finished entities.
10779 	 */
10780 	while ((RAW == 0) && (ctxt->inputNr > 1))
10781 	    xmlPopInput(ctxt);
10782 
10783 	if (ctxt->input == NULL) break;
10784 	if (ctxt->input->buf == NULL)
10785 	    avail = ctxt->input->length -
10786 	            (ctxt->input->cur - ctxt->input->base);
10787 	else {
10788 	    /*
10789 	     * If we are operating on converted input, try to flush
10790 	     * remainng chars to avoid them stalling in the non-converted
10791 	     * buffer.
10792 	     */
10793 	    if ((ctxt->input->buf->raw != NULL) &&
10794 		(ctxt->input->buf->raw->use > 0)) {
10795 		int base = ctxt->input->base -
10796 		           ctxt->input->buf->buffer->content;
10797 		int current = ctxt->input->cur - ctxt->input->base;
10798 
10799 		xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10800 		ctxt->input->base = ctxt->input->buf->buffer->content + base;
10801 		ctxt->input->cur = ctxt->input->base + current;
10802 		ctxt->input->end =
10803 		    &ctxt->input->buf->buffer->content[
10804 		                       ctxt->input->buf->buffer->use];
10805 	    }
10806 	    avail = ctxt->input->buf->buffer->use -
10807 		    (ctxt->input->cur - ctxt->input->base);
10808 	}
10809         if (avail < 1)
10810 	    goto done;
10811         switch (ctxt->instate) {
10812             case XML_PARSER_EOF:
10813 	        /*
10814 		 * Document parsing is done !
10815 		 */
10816 	        goto done;
10817             case XML_PARSER_START:
10818 		if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10819 		    xmlChar start[4];
10820 		    xmlCharEncoding enc;
10821 
10822 		    /*
10823 		     * Very first chars read from the document flow.
10824 		     */
10825 		    if (avail < 4)
10826 			goto done;
10827 
10828 		    /*
10829 		     * Get the 4 first bytes and decode the charset
10830 		     * if enc != XML_CHAR_ENCODING_NONE
10831 		     * plug some encoding conversion routines,
10832 		     * else xmlSwitchEncoding will set to (default)
10833 		     * UTF8.
10834 		     */
10835 		    start[0] = RAW;
10836 		    start[1] = NXT(1);
10837 		    start[2] = NXT(2);
10838 		    start[3] = NXT(3);
10839 		    enc = xmlDetectCharEncoding(start, 4);
10840 		    xmlSwitchEncoding(ctxt, enc);
10841 		    break;
10842 		}
10843 
10844 		if (avail < 2)
10845 		    goto done;
10846 		cur = ctxt->input->cur[0];
10847 		next = ctxt->input->cur[1];
10848 		if (cur == 0) {
10849 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10850 			ctxt->sax->setDocumentLocator(ctxt->userData,
10851 						      &xmlDefaultSAXLocator);
10852 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10853 		    ctxt->instate = XML_PARSER_EOF;
10854 #ifdef DEBUG_PUSH
10855 		    xmlGenericError(xmlGenericErrorContext,
10856 			    "PP: entering EOF\n");
10857 #endif
10858 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859 			ctxt->sax->endDocument(ctxt->userData);
10860 		    goto done;
10861 		}
10862 	        if ((cur == '<') && (next == '?')) {
10863 		    /* PI or XML decl */
10864 		    if (avail < 5) return(ret);
10865 		    if ((!terminate) &&
10866 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10867 			return(ret);
10868 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10869 			ctxt->sax->setDocumentLocator(ctxt->userData,
10870 						      &xmlDefaultSAXLocator);
10871 		    if ((ctxt->input->cur[2] == 'x') &&
10872 			(ctxt->input->cur[3] == 'm') &&
10873 			(ctxt->input->cur[4] == 'l') &&
10874 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
10875 			ret += 5;
10876 #ifdef DEBUG_PUSH
10877 			xmlGenericError(xmlGenericErrorContext,
10878 				"PP: Parsing XML Decl\n");
10879 #endif
10880 			xmlParseXMLDecl(ctxt);
10881 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10882 			    /*
10883 			     * The XML REC instructs us to stop parsing right
10884 			     * here
10885 			     */
10886 			    ctxt->instate = XML_PARSER_EOF;
10887 			    return(0);
10888 			}
10889 			ctxt->standalone = ctxt->input->standalone;
10890 			if ((ctxt->encoding == NULL) &&
10891 			    (ctxt->input->encoding != NULL))
10892 			    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10893 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10894 			    (!ctxt->disableSAX))
10895 			    ctxt->sax->startDocument(ctxt->userData);
10896 			ctxt->instate = XML_PARSER_MISC;
10897 #ifdef DEBUG_PUSH
10898 			xmlGenericError(xmlGenericErrorContext,
10899 				"PP: entering MISC\n");
10900 #endif
10901 		    } else {
10902 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10903 			if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10904 			    (!ctxt->disableSAX))
10905 			    ctxt->sax->startDocument(ctxt->userData);
10906 			ctxt->instate = XML_PARSER_MISC;
10907 #ifdef DEBUG_PUSH
10908 			xmlGenericError(xmlGenericErrorContext,
10909 				"PP: entering MISC\n");
10910 #endif
10911 		    }
10912 		} else {
10913 		    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10914 			ctxt->sax->setDocumentLocator(ctxt->userData,
10915 						      &xmlDefaultSAXLocator);
10916 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10917 		    if (ctxt->version == NULL) {
10918 		        xmlErrMemory(ctxt, NULL);
10919 			break;
10920 		    }
10921 		    if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10922 		        (!ctxt->disableSAX))
10923 			ctxt->sax->startDocument(ctxt->userData);
10924 		    ctxt->instate = XML_PARSER_MISC;
10925 #ifdef DEBUG_PUSH
10926 		    xmlGenericError(xmlGenericErrorContext,
10927 			    "PP: entering MISC\n");
10928 #endif
10929 		}
10930 		break;
10931             case XML_PARSER_START_TAG: {
10932 	        const xmlChar *name;
10933 		const xmlChar *prefix = NULL;
10934 		const xmlChar *URI = NULL;
10935 		int nsNr = ctxt->nsNr;
10936 
10937 		if ((avail < 2) && (ctxt->inputNr == 1))
10938 		    goto done;
10939 		cur = ctxt->input->cur[0];
10940 	        if (cur != '<') {
10941 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10942 		    ctxt->instate = XML_PARSER_EOF;
10943 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10944 			ctxt->sax->endDocument(ctxt->userData);
10945 		    goto done;
10946 		}
10947 		if (!terminate) {
10948 		    if (ctxt->progressive) {
10949 		        /* > can be found unescaped in attribute values */
10950 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10951 			    goto done;
10952 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10953 			goto done;
10954 		    }
10955 		}
10956 		if (ctxt->spaceNr == 0)
10957 		    spacePush(ctxt, -1);
10958 		else if (*ctxt->space == -2)
10959 		    spacePush(ctxt, -1);
10960 		else
10961 		    spacePush(ctxt, *ctxt->space);
10962 #ifdef LIBXML_SAX1_ENABLED
10963 		if (ctxt->sax2)
10964 #endif /* LIBXML_SAX1_ENABLED */
10965 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10966 #ifdef LIBXML_SAX1_ENABLED
10967 		else
10968 		    name = xmlParseStartTag(ctxt);
10969 #endif /* LIBXML_SAX1_ENABLED */
10970 		if (ctxt->instate == XML_PARSER_EOF)
10971 		    goto done;
10972 		if (name == NULL) {
10973 		    spacePop(ctxt);
10974 		    ctxt->instate = XML_PARSER_EOF;
10975 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10976 			ctxt->sax->endDocument(ctxt->userData);
10977 		    goto done;
10978 		}
10979 #ifdef LIBXML_VALID_ENABLED
10980 		/*
10981 		 * [ VC: Root Element Type ]
10982 		 * The Name in the document type declaration must match
10983 		 * the element type of the root element.
10984 		 */
10985 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10986 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
10987 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10988 #endif /* LIBXML_VALID_ENABLED */
10989 
10990 		/*
10991 		 * Check for an Empty Element.
10992 		 */
10993 		if ((RAW == '/') && (NXT(1) == '>')) {
10994 		    SKIP(2);
10995 
10996 		    if (ctxt->sax2) {
10997 			if ((ctxt->sax != NULL) &&
10998 			    (ctxt->sax->endElementNs != NULL) &&
10999 			    (!ctxt->disableSAX))
11000 			    ctxt->sax->endElementNs(ctxt->userData, name,
11001 			                            prefix, URI);
11002 			if (ctxt->nsNr - nsNr > 0)
11003 			    nsPop(ctxt, ctxt->nsNr - nsNr);
11004 #ifdef LIBXML_SAX1_ENABLED
11005 		    } else {
11006 			if ((ctxt->sax != NULL) &&
11007 			    (ctxt->sax->endElement != NULL) &&
11008 			    (!ctxt->disableSAX))
11009 			    ctxt->sax->endElement(ctxt->userData, name);
11010 #endif /* LIBXML_SAX1_ENABLED */
11011 		    }
11012 		    if (ctxt->instate == XML_PARSER_EOF)
11013 			goto done;
11014 		    spacePop(ctxt);
11015 		    if (ctxt->nameNr == 0) {
11016 			ctxt->instate = XML_PARSER_EPILOG;
11017 		    } else {
11018 			ctxt->instate = XML_PARSER_CONTENT;
11019 		    }
11020 		    break;
11021 		}
11022 		if (RAW == '>') {
11023 		    NEXT;
11024 		} else {
11025 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11026 					 "Couldn't find end of Start Tag %s\n",
11027 					 name);
11028 		    nodePop(ctxt);
11029 		    spacePop(ctxt);
11030 		}
11031 		if (ctxt->sax2)
11032 		    nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11033 #ifdef LIBXML_SAX1_ENABLED
11034 		else
11035 		    namePush(ctxt, name);
11036 #endif /* LIBXML_SAX1_ENABLED */
11037 
11038 		ctxt->instate = XML_PARSER_CONTENT;
11039                 break;
11040 	    }
11041             case XML_PARSER_CONTENT: {
11042 		const xmlChar *test;
11043 		unsigned int cons;
11044 		if ((avail < 2) && (ctxt->inputNr == 1))
11045 		    goto done;
11046 		cur = ctxt->input->cur[0];
11047 		next = ctxt->input->cur[1];
11048 
11049 		test = CUR_PTR;
11050 	        cons = ctxt->input->consumed;
11051 		if ((cur == '<') && (next == '/')) {
11052 		    ctxt->instate = XML_PARSER_END_TAG;
11053 		    break;
11054 	        } else if ((cur == '<') && (next == '?')) {
11055 		    if ((!terminate) &&
11056 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11057 			goto done;
11058 		    xmlParsePI(ctxt);
11059 		} else if ((cur == '<') && (next != '!')) {
11060 		    ctxt->instate = XML_PARSER_START_TAG;
11061 		    break;
11062 		} else if ((cur == '<') && (next == '!') &&
11063 		           (ctxt->input->cur[2] == '-') &&
11064 			   (ctxt->input->cur[3] == '-')) {
11065 		    int term;
11066 
11067 	            if (avail < 4)
11068 		        goto done;
11069 		    ctxt->input->cur += 4;
11070 		    term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11071 		    ctxt->input->cur -= 4;
11072 		    if ((!terminate) && (term < 0))
11073 			goto done;
11074 		    xmlParseComment(ctxt);
11075 		    ctxt->instate = XML_PARSER_CONTENT;
11076 		} else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11077 		    (ctxt->input->cur[2] == '[') &&
11078 		    (ctxt->input->cur[3] == 'C') &&
11079 		    (ctxt->input->cur[4] == 'D') &&
11080 		    (ctxt->input->cur[5] == 'A') &&
11081 		    (ctxt->input->cur[6] == 'T') &&
11082 		    (ctxt->input->cur[7] == 'A') &&
11083 		    (ctxt->input->cur[8] == '[')) {
11084 		    SKIP(9);
11085 		    ctxt->instate = XML_PARSER_CDATA_SECTION;
11086 		    break;
11087 		} else if ((cur == '<') && (next == '!') &&
11088 		           (avail < 9)) {
11089 		    goto done;
11090 		} else if (cur == '&') {
11091 		    if ((!terminate) &&
11092 		        (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11093 			goto done;
11094 		    xmlParseReference(ctxt);
11095 		} else {
11096 		    /* TODO Avoid the extra copy, handle directly !!! */
11097 		    /*
11098 		     * Goal of the following test is:
11099 		     *  - minimize calls to the SAX 'character' callback
11100 		     *    when they are mergeable
11101 		     *  - handle an problem for isBlank when we only parse
11102 		     *    a sequence of blank chars and the next one is
11103 		     *    not available to check against '<' presence.
11104 		     *  - tries to homogenize the differences in SAX
11105 		     *    callbacks between the push and pull versions
11106 		     *    of the parser.
11107 		     */
11108 		    if ((ctxt->inputNr == 1) &&
11109 		        (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11110 			if (!terminate) {
11111 			    if (ctxt->progressive) {
11112 				if ((lastlt == NULL) ||
11113 				    (ctxt->input->cur > lastlt))
11114 				    goto done;
11115 			    } else if (xmlParseLookupSequence(ctxt,
11116 			                                      '<', 0, 0) < 0) {
11117 				goto done;
11118 			    }
11119 			}
11120                     }
11121 		    ctxt->checkIndex = 0;
11122 		    xmlParseCharData(ctxt, 0);
11123 		}
11124 		/*
11125 		 * Pop-up of finished entities.
11126 		 */
11127 		while ((RAW == 0) && (ctxt->inputNr > 1))
11128 		    xmlPopInput(ctxt);
11129 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11130 		    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11131 		                "detected an error in element content\n");
11132 		    ctxt->instate = XML_PARSER_EOF;
11133 		    break;
11134 		}
11135 		break;
11136 	    }
11137             case XML_PARSER_END_TAG:
11138 		if (avail < 2)
11139 		    goto done;
11140 		if (!terminate) {
11141 		    if (ctxt->progressive) {
11142 		        /* > can be found unescaped in attribute values */
11143 		        if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11144 			    goto done;
11145 		    } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11146 			goto done;
11147 		    }
11148 		}
11149 		if (ctxt->sax2) {
11150 		    xmlParseEndTag2(ctxt,
11151 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11152 		           (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11153 		       (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11154 		    nameNsPop(ctxt);
11155 		}
11156 #ifdef LIBXML_SAX1_ENABLED
11157 		  else
11158 		    xmlParseEndTag1(ctxt, 0);
11159 #endif /* LIBXML_SAX1_ENABLED */
11160 		if (ctxt->instate == XML_PARSER_EOF) {
11161 		    /* Nothing */
11162 		} else if (ctxt->nameNr == 0) {
11163 		    ctxt->instate = XML_PARSER_EPILOG;
11164 		} else {
11165 		    ctxt->instate = XML_PARSER_CONTENT;
11166 		}
11167 		break;
11168             case XML_PARSER_CDATA_SECTION: {
11169 	        /*
11170 		 * The Push mode need to have the SAX callback for
11171 		 * cdataBlock merge back contiguous callbacks.
11172 		 */
11173 		int base;
11174 
11175 		base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11176 		if (base < 0) {
11177 		    if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11178 		        int tmp;
11179 
11180 			tmp = xmlCheckCdataPush(ctxt->input->cur,
11181 			                        XML_PARSER_BIG_BUFFER_SIZE);
11182 			if (tmp < 0) {
11183 			    tmp = -tmp;
11184 			    ctxt->input->cur += tmp;
11185 			    goto encoding_error;
11186 			}
11187 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11188 			    if (ctxt->sax->cdataBlock != NULL)
11189 				ctxt->sax->cdataBlock(ctxt->userData,
11190 				                      ctxt->input->cur, tmp);
11191 			    else if (ctxt->sax->characters != NULL)
11192 				ctxt->sax->characters(ctxt->userData,
11193 				                      ctxt->input->cur, tmp);
11194 			}
11195 			if (ctxt->instate == XML_PARSER_EOF)
11196 			    goto done;
11197 			SKIPL(tmp);
11198 			ctxt->checkIndex = 0;
11199 		    }
11200 		    goto done;
11201 		} else {
11202 		    int tmp;
11203 
11204 		    tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11205 		    if ((tmp < 0) || (tmp != base)) {
11206 			tmp = -tmp;
11207 			ctxt->input->cur += tmp;
11208 			goto encoding_error;
11209 		    }
11210 		    if ((ctxt->sax != NULL) && (base == 0) &&
11211 		        (ctxt->sax->cdataBlock != NULL) &&
11212 		        (!ctxt->disableSAX)) {
11213 			/*
11214 			 * Special case to provide identical behaviour
11215 			 * between pull and push parsers on enpty CDATA
11216 			 * sections
11217 			 */
11218 			 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11219 			     (!strncmp((const char *)&ctxt->input->cur[-9],
11220 			               "<![CDATA[", 9)))
11221 			     ctxt->sax->cdataBlock(ctxt->userData,
11222 			                           BAD_CAST "", 0);
11223 		    } else if ((ctxt->sax != NULL) && (base > 0) &&
11224 			(!ctxt->disableSAX)) {
11225 			if (ctxt->sax->cdataBlock != NULL)
11226 			    ctxt->sax->cdataBlock(ctxt->userData,
11227 						  ctxt->input->cur, base);
11228 			else if (ctxt->sax->characters != NULL)
11229 			    ctxt->sax->characters(ctxt->userData,
11230 						  ctxt->input->cur, base);
11231 		    }
11232 		    if (ctxt->instate == XML_PARSER_EOF)
11233 			goto done;
11234 		    SKIPL(base + 3);
11235 		    ctxt->checkIndex = 0;
11236 		    ctxt->instate = XML_PARSER_CONTENT;
11237 #ifdef DEBUG_PUSH
11238 		    xmlGenericError(xmlGenericErrorContext,
11239 			    "PP: entering CONTENT\n");
11240 #endif
11241 		}
11242 		break;
11243 	    }
11244             case XML_PARSER_MISC:
11245 		SKIP_BLANKS;
11246 		if (ctxt->input->buf == NULL)
11247 		    avail = ctxt->input->length -
11248 		            (ctxt->input->cur - ctxt->input->base);
11249 		else
11250 		    avail = ctxt->input->buf->buffer->use -
11251 		            (ctxt->input->cur - ctxt->input->base);
11252 		if (avail < 2)
11253 		    goto done;
11254 		cur = ctxt->input->cur[0];
11255 		next = ctxt->input->cur[1];
11256 	        if ((cur == '<') && (next == '?')) {
11257 		    if ((!terminate) &&
11258 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11259 			goto done;
11260 #ifdef DEBUG_PUSH
11261 		    xmlGenericError(xmlGenericErrorContext,
11262 			    "PP: Parsing PI\n");
11263 #endif
11264 		    xmlParsePI(ctxt);
11265 		    if (ctxt->instate == XML_PARSER_EOF)
11266 			goto done;
11267 		    ctxt->checkIndex = 0;
11268 		} else if ((cur == '<') && (next == '!') &&
11269 		    (ctxt->input->cur[2] == '-') &&
11270 		    (ctxt->input->cur[3] == '-')) {
11271 		    if ((!terminate) &&
11272 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11273 			goto done;
11274 #ifdef DEBUG_PUSH
11275 		    xmlGenericError(xmlGenericErrorContext,
11276 			    "PP: Parsing Comment\n");
11277 #endif
11278 		    xmlParseComment(ctxt);
11279 		    if (ctxt->instate == XML_PARSER_EOF)
11280 			goto done;
11281 		    ctxt->instate = XML_PARSER_MISC;
11282 		    ctxt->checkIndex = 0;
11283 		} else if ((cur == '<') && (next == '!') &&
11284 		    (ctxt->input->cur[2] == 'D') &&
11285 		    (ctxt->input->cur[3] == 'O') &&
11286 		    (ctxt->input->cur[4] == 'C') &&
11287 		    (ctxt->input->cur[5] == 'T') &&
11288 		    (ctxt->input->cur[6] == 'Y') &&
11289 		    (ctxt->input->cur[7] == 'P') &&
11290 		    (ctxt->input->cur[8] == 'E')) {
11291 		    if ((!terminate) &&
11292 		        (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11293 			goto done;
11294 #ifdef DEBUG_PUSH
11295 		    xmlGenericError(xmlGenericErrorContext,
11296 			    "PP: Parsing internal subset\n");
11297 #endif
11298 		    ctxt->inSubset = 1;
11299 		    xmlParseDocTypeDecl(ctxt);
11300 		    if (ctxt->instate == XML_PARSER_EOF)
11301 			goto done;
11302 		    if (RAW == '[') {
11303 			ctxt->instate = XML_PARSER_DTD;
11304 #ifdef DEBUG_PUSH
11305 			xmlGenericError(xmlGenericErrorContext,
11306 				"PP: entering DTD\n");
11307 #endif
11308 		    } else {
11309 			/*
11310 			 * Create and update the external subset.
11311 			 */
11312 			ctxt->inSubset = 2;
11313 			if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11314 			    (ctxt->sax->externalSubset != NULL))
11315 			    ctxt->sax->externalSubset(ctxt->userData,
11316 				    ctxt->intSubName, ctxt->extSubSystem,
11317 				    ctxt->extSubURI);
11318 			ctxt->inSubset = 0;
11319 			xmlCleanSpecialAttr(ctxt);
11320 			ctxt->instate = XML_PARSER_PROLOG;
11321 #ifdef DEBUG_PUSH
11322 			xmlGenericError(xmlGenericErrorContext,
11323 				"PP: entering PROLOG\n");
11324 #endif
11325 		    }
11326 		} else if ((cur == '<') && (next == '!') &&
11327 		           (avail < 9)) {
11328 		    goto done;
11329 		} else {
11330 		    ctxt->instate = XML_PARSER_START_TAG;
11331 		    ctxt->progressive = 1;
11332 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11333 #ifdef DEBUG_PUSH
11334 		    xmlGenericError(xmlGenericErrorContext,
11335 			    "PP: entering START_TAG\n");
11336 #endif
11337 		}
11338 		break;
11339             case XML_PARSER_PROLOG:
11340 		SKIP_BLANKS;
11341 		if (ctxt->input->buf == NULL)
11342 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11343 		else
11344 		    avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11345 		if (avail < 2)
11346 		    goto done;
11347 		cur = ctxt->input->cur[0];
11348 		next = ctxt->input->cur[1];
11349 	        if ((cur == '<') && (next == '?')) {
11350 		    if ((!terminate) &&
11351 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11352 			goto done;
11353 #ifdef DEBUG_PUSH
11354 		    xmlGenericError(xmlGenericErrorContext,
11355 			    "PP: Parsing PI\n");
11356 #endif
11357 		    xmlParsePI(ctxt);
11358 		    if (ctxt->instate == XML_PARSER_EOF)
11359 			goto done;
11360 		} else if ((cur == '<') && (next == '!') &&
11361 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11362 		    if ((!terminate) &&
11363 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11364 			goto done;
11365 #ifdef DEBUG_PUSH
11366 		    xmlGenericError(xmlGenericErrorContext,
11367 			    "PP: Parsing Comment\n");
11368 #endif
11369 		    xmlParseComment(ctxt);
11370 		    if (ctxt->instate == XML_PARSER_EOF)
11371 			goto done;
11372 		    ctxt->instate = XML_PARSER_PROLOG;
11373 		} else if ((cur == '<') && (next == '!') &&
11374 		           (avail < 4)) {
11375 		    goto done;
11376 		} else {
11377 		    ctxt->instate = XML_PARSER_START_TAG;
11378 		    if (ctxt->progressive == 0)
11379 			ctxt->progressive = 1;
11380 		    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11381 #ifdef DEBUG_PUSH
11382 		    xmlGenericError(xmlGenericErrorContext,
11383 			    "PP: entering START_TAG\n");
11384 #endif
11385 		}
11386 		break;
11387             case XML_PARSER_EPILOG:
11388 		SKIP_BLANKS;
11389 		if (ctxt->input->buf == NULL)
11390 		    avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11391 		else
11392 		    avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11393 		if (avail < 2)
11394 		    goto done;
11395 		cur = ctxt->input->cur[0];
11396 		next = ctxt->input->cur[1];
11397 	        if ((cur == '<') && (next == '?')) {
11398 		    if ((!terminate) &&
11399 		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11400 			goto done;
11401 #ifdef DEBUG_PUSH
11402 		    xmlGenericError(xmlGenericErrorContext,
11403 			    "PP: Parsing PI\n");
11404 #endif
11405 		    xmlParsePI(ctxt);
11406 		    if (ctxt->instate == XML_PARSER_EOF)
11407 			goto done;
11408 		    ctxt->instate = XML_PARSER_EPILOG;
11409 		} else if ((cur == '<') && (next == '!') &&
11410 		    (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11411 		    if ((!terminate) &&
11412 		        (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11413 			goto done;
11414 #ifdef DEBUG_PUSH
11415 		    xmlGenericError(xmlGenericErrorContext,
11416 			    "PP: Parsing Comment\n");
11417 #endif
11418 		    xmlParseComment(ctxt);
11419 		    if (ctxt->instate == XML_PARSER_EOF)
11420 			goto done;
11421 		    ctxt->instate = XML_PARSER_EPILOG;
11422 		} else if ((cur == '<') && (next == '!') &&
11423 		           (avail < 4)) {
11424 		    goto done;
11425 		} else {
11426 		    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11427 		    ctxt->instate = XML_PARSER_EOF;
11428 #ifdef DEBUG_PUSH
11429 		    xmlGenericError(xmlGenericErrorContext,
11430 			    "PP: entering EOF\n");
11431 #endif
11432 		    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11433 			ctxt->sax->endDocument(ctxt->userData);
11434 		    goto done;
11435 		}
11436 		break;
11437             case XML_PARSER_DTD: {
11438 	        /*
11439 		 * Sorry but progressive parsing of the internal subset
11440 		 * is not expected to be supported. We first check that
11441 		 * the full content of the internal subset is available and
11442 		 * the parsing is launched only at that point.
11443 		 * Internal subset ends up with "']' S? '>'" in an unescaped
11444 		 * section and not in a ']]>' sequence which are conditional
11445 		 * sections (whoever argued to keep that crap in XML deserve
11446 		 * a place in hell !).
11447 		 */
11448 		int base, i;
11449 		xmlChar *buf;
11450 	        xmlChar quote = 0;
11451 
11452 		base = ctxt->input->cur - ctxt->input->base;
11453 		if (base < 0) return(0);
11454 		if (ctxt->checkIndex > base)
11455 		    base = ctxt->checkIndex;
11456 		buf = ctxt->input->buf->buffer->content;
11457 		for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11458 		     base++) {
11459 		    if (quote != 0) {
11460 		        if (buf[base] == quote)
11461 			    quote = 0;
11462 			continue;
11463 		    }
11464 		    if ((quote == 0) && (buf[base] == '<')) {
11465 		        int found  = 0;
11466 			/* special handling of comments */
11467 		        if (((unsigned int) base + 4 <
11468 			     ctxt->input->buf->buffer->use) &&
11469 			    (buf[base + 1] == '!') &&
11470 			    (buf[base + 2] == '-') &&
11471 			    (buf[base + 3] == '-')) {
11472 			    for (;(unsigned int) base + 3 <
11473 			          ctxt->input->buf->buffer->use; base++) {
11474 				if ((buf[base] == '-') &&
11475 				    (buf[base + 1] == '-') &&
11476 				    (buf[base + 2] == '>')) {
11477 				    found = 1;
11478 				    base += 2;
11479 				    break;
11480 				}
11481 		            }
11482 			    if (!found) {
11483 #if 0
11484 			        fprintf(stderr, "unfinished comment\n");
11485 #endif
11486 			        break; /* for */
11487 		            }
11488 		            continue;
11489 			}
11490 		    }
11491 		    if (buf[base] == '"') {
11492 		        quote = '"';
11493 			continue;
11494 		    }
11495 		    if (buf[base] == '\'') {
11496 		        quote = '\'';
11497 			continue;
11498 		    }
11499 		    if (buf[base] == ']') {
11500 #if 0
11501 		        fprintf(stderr, "%c%c%c%c: ", buf[base],
11502 			        buf[base + 1], buf[base + 2], buf[base + 3]);
11503 #endif
11504 		        if ((unsigned int) base +1 >=
11505 		            ctxt->input->buf->buffer->use)
11506 			    break;
11507 			if (buf[base + 1] == ']') {
11508 			    /* conditional crap, skip both ']' ! */
11509 			    base++;
11510 			    continue;
11511 			}
11512 		        for (i = 1;
11513 		     (unsigned int) base + i < ctxt->input->buf->buffer->use;
11514 		             i++) {
11515 			    if (buf[base + i] == '>') {
11516 #if 0
11517 			        fprintf(stderr, "found\n");
11518 #endif
11519 			        goto found_end_int_subset;
11520 			    }
11521 			    if (!IS_BLANK_CH(buf[base + i])) {
11522 #if 0
11523 			        fprintf(stderr, "not found\n");
11524 #endif
11525 			        goto not_end_of_int_subset;
11526 			    }
11527 			}
11528 #if 0
11529 			fprintf(stderr, "end of stream\n");
11530 #endif
11531 		        break;
11532 
11533 		    }
11534 not_end_of_int_subset:
11535                     continue; /* for */
11536 		}
11537 		/*
11538 		 * We didn't found the end of the Internal subset
11539 		 */
11540 #ifdef DEBUG_PUSH
11541 		if (next == 0)
11542 		    xmlGenericError(xmlGenericErrorContext,
11543 			    "PP: lookup of int subset end filed\n");
11544 #endif
11545 	        goto done;
11546 
11547 found_end_int_subset:
11548 		xmlParseInternalSubset(ctxt);
11549 		if (ctxt->instate == XML_PARSER_EOF)
11550 		    goto done;
11551 		ctxt->inSubset = 2;
11552 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11553 		    (ctxt->sax->externalSubset != NULL))
11554 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11555 			    ctxt->extSubSystem, ctxt->extSubURI);
11556 		ctxt->inSubset = 0;
11557 		xmlCleanSpecialAttr(ctxt);
11558 		if (ctxt->instate == XML_PARSER_EOF)
11559 		    goto done;
11560 		ctxt->instate = XML_PARSER_PROLOG;
11561 		ctxt->checkIndex = 0;
11562 #ifdef DEBUG_PUSH
11563 		xmlGenericError(xmlGenericErrorContext,
11564 			"PP: entering PROLOG\n");
11565 #endif
11566                 break;
11567 	    }
11568             case XML_PARSER_COMMENT:
11569 		xmlGenericError(xmlGenericErrorContext,
11570 			"PP: internal error, state == COMMENT\n");
11571 		ctxt->instate = XML_PARSER_CONTENT;
11572 #ifdef DEBUG_PUSH
11573 		xmlGenericError(xmlGenericErrorContext,
11574 			"PP: entering CONTENT\n");
11575 #endif
11576 		break;
11577             case XML_PARSER_IGNORE:
11578 		xmlGenericError(xmlGenericErrorContext,
11579 			"PP: internal error, state == IGNORE");
11580 	        ctxt->instate = XML_PARSER_DTD;
11581 #ifdef DEBUG_PUSH
11582 		xmlGenericError(xmlGenericErrorContext,
11583 			"PP: entering DTD\n");
11584 #endif
11585 	        break;
11586             case XML_PARSER_PI:
11587 		xmlGenericError(xmlGenericErrorContext,
11588 			"PP: internal error, state == PI\n");
11589 		ctxt->instate = XML_PARSER_CONTENT;
11590 #ifdef DEBUG_PUSH
11591 		xmlGenericError(xmlGenericErrorContext,
11592 			"PP: entering CONTENT\n");
11593 #endif
11594 		break;
11595             case XML_PARSER_ENTITY_DECL:
11596 		xmlGenericError(xmlGenericErrorContext,
11597 			"PP: internal error, state == ENTITY_DECL\n");
11598 		ctxt->instate = XML_PARSER_DTD;
11599 #ifdef DEBUG_PUSH
11600 		xmlGenericError(xmlGenericErrorContext,
11601 			"PP: entering DTD\n");
11602 #endif
11603 		break;
11604             case XML_PARSER_ENTITY_VALUE:
11605 		xmlGenericError(xmlGenericErrorContext,
11606 			"PP: internal error, state == ENTITY_VALUE\n");
11607 		ctxt->instate = XML_PARSER_CONTENT;
11608 #ifdef DEBUG_PUSH
11609 		xmlGenericError(xmlGenericErrorContext,
11610 			"PP: entering DTD\n");
11611 #endif
11612 		break;
11613             case XML_PARSER_ATTRIBUTE_VALUE:
11614 		xmlGenericError(xmlGenericErrorContext,
11615 			"PP: internal error, state == ATTRIBUTE_VALUE\n");
11616 		ctxt->instate = XML_PARSER_START_TAG;
11617 #ifdef DEBUG_PUSH
11618 		xmlGenericError(xmlGenericErrorContext,
11619 			"PP: entering START_TAG\n");
11620 #endif
11621 		break;
11622             case XML_PARSER_SYSTEM_LITERAL:
11623 		xmlGenericError(xmlGenericErrorContext,
11624 			"PP: internal error, state == SYSTEM_LITERAL\n");
11625 		ctxt->instate = XML_PARSER_START_TAG;
11626 #ifdef DEBUG_PUSH
11627 		xmlGenericError(xmlGenericErrorContext,
11628 			"PP: entering START_TAG\n");
11629 #endif
11630 		break;
11631             case XML_PARSER_PUBLIC_LITERAL:
11632 		xmlGenericError(xmlGenericErrorContext,
11633 			"PP: internal error, state == PUBLIC_LITERAL\n");
11634 		ctxt->instate = XML_PARSER_START_TAG;
11635 #ifdef DEBUG_PUSH
11636 		xmlGenericError(xmlGenericErrorContext,
11637 			"PP: entering START_TAG\n");
11638 #endif
11639 		break;
11640 	}
11641     }
11642 done:
11643 #ifdef DEBUG_PUSH
11644     xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11645 #endif
11646     return(ret);
11647 encoding_error:
11648     {
11649         char buffer[150];
11650 
11651 	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11652 			ctxt->input->cur[0], ctxt->input->cur[1],
11653 			ctxt->input->cur[2], ctxt->input->cur[3]);
11654 	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11655 		     "Input is not proper UTF-8, indicate encoding !\n%s",
11656 		     BAD_CAST buffer, NULL);
11657     }
11658     return(0);
11659 }
11660 
11661 /**
11662  * xmlParseChunk:
11663  * @ctxt:  an XML parser context
11664  * @chunk:  an char array
11665  * @size:  the size in byte of the chunk
11666  * @terminate:  last chunk indicator
11667  *
11668  * Parse a Chunk of memory
11669  *
11670  * Returns zero if no error, the xmlParserErrors otherwise.
11671  */
11672 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11673 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11674               int terminate) {
11675     int end_in_lf = 0;
11676     int remain = 0;
11677 
11678     if (ctxt == NULL)
11679         return(XML_ERR_INTERNAL_ERROR);
11680     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11681         return(ctxt->errNo);
11682     if (ctxt->instate == XML_PARSER_EOF)
11683         return(-1);
11684     if (ctxt->instate == XML_PARSER_START)
11685         xmlDetectSAX2(ctxt);
11686     if ((size > 0) && (chunk != NULL) && (!terminate) &&
11687         (chunk[size - 1] == '\r')) {
11688 	end_in_lf = 1;
11689 	size--;
11690     }
11691 
11692 xmldecl_done:
11693 
11694     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11695         (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
11696 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11697 	int cur = ctxt->input->cur - ctxt->input->base;
11698 	int res;
11699 
11700         /*
11701          * Specific handling if we autodetected an encoding, we should not
11702          * push more than the first line ... which depend on the encoding
11703          * And only push the rest once the final encoding was detected
11704          */
11705         if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11706             (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11707             unsigned int len = 45;
11708 
11709             if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11710                                BAD_CAST "UTF-16")) ||
11711                 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11712                                BAD_CAST "UTF16")))
11713                 len = 90;
11714             else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11715                                     BAD_CAST "UCS-4")) ||
11716                      (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11717                                     BAD_CAST "UCS4")))
11718                 len = 180;
11719 
11720             if (ctxt->input->buf->rawconsumed < len)
11721                 len -= ctxt->input->buf->rawconsumed;
11722 
11723             /*
11724              * Change size for reading the initial declaration only
11725              * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11726              * will blindly copy extra bytes from memory.
11727              */
11728             if (size > len) {
11729                 remain = size - len;
11730                 size = len;
11731             } else {
11732                 remain = 0;
11733             }
11734         }
11735 	res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11736 	if (res < 0) {
11737 	    ctxt->errNo = XML_PARSER_EOF;
11738 	    ctxt->disableSAX = 1;
11739 	    return (XML_PARSER_EOF);
11740 	}
11741 	ctxt->input->base = ctxt->input->buf->buffer->content + base;
11742 	ctxt->input->cur = ctxt->input->base + cur;
11743 	ctxt->input->end =
11744 	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11745 #ifdef DEBUG_PUSH
11746 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11747 #endif
11748 
11749     } else if (ctxt->instate != XML_PARSER_EOF) {
11750 	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11751 	    xmlParserInputBufferPtr in = ctxt->input->buf;
11752 	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
11753 		    (in->raw != NULL)) {
11754 		int nbchars;
11755 
11756 		nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11757 		if (nbchars < 0) {
11758 		    /* TODO 2.6.0 */
11759 		    xmlGenericError(xmlGenericErrorContext,
11760 				    "xmlParseChunk: encoder error\n");
11761 		    return(XML_ERR_INVALID_ENCODING);
11762 		}
11763 	    }
11764 	}
11765     }
11766     if (remain != 0)
11767         xmlParseTryOrFinish(ctxt, 0);
11768     else
11769         xmlParseTryOrFinish(ctxt, terminate);
11770     if (ctxt->instate == XML_PARSER_EOF)
11771         return(ctxt->errNo);
11772     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11773         return(ctxt->errNo);
11774 
11775     if (remain != 0) {
11776         chunk += size;
11777         size = remain;
11778         remain = 0;
11779         goto xmldecl_done;
11780     }
11781     if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11782         (ctxt->input->buf != NULL)) {
11783 	xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11784     }
11785     if (terminate) {
11786 	/*
11787 	 * Check for termination
11788 	 */
11789 	int avail = 0;
11790 
11791 	if (ctxt->input != NULL) {
11792 	    if (ctxt->input->buf == NULL)
11793 		avail = ctxt->input->length -
11794 			(ctxt->input->cur - ctxt->input->base);
11795 	    else
11796 		avail = ctxt->input->buf->buffer->use -
11797 			(ctxt->input->cur - ctxt->input->base);
11798 	}
11799 
11800 	if ((ctxt->instate != XML_PARSER_EOF) &&
11801 	    (ctxt->instate != XML_PARSER_EPILOG)) {
11802 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11803 	}
11804 	if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11805 	    xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11806 	}
11807 	if (ctxt->instate != XML_PARSER_EOF) {
11808 	    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11809 		ctxt->sax->endDocument(ctxt->userData);
11810 	}
11811 	ctxt->instate = XML_PARSER_EOF;
11812     }
11813     return((xmlParserErrors) ctxt->errNo);
11814 }
11815 
11816 /************************************************************************
11817  *									*
11818  * 		I/O front end functions to the parser			*
11819  *									*
11820  ************************************************************************/
11821 
11822 /**
11823  * xmlCreatePushParserCtxt:
11824  * @sax:  a SAX handler
11825  * @user_data:  The user data returned on SAX callbacks
11826  * @chunk:  a pointer to an array of chars
11827  * @size:  number of chars in the array
11828  * @filename:  an optional file name or URI
11829  *
11830  * Create a parser context for using the XML parser in push mode.
11831  * If @buffer and @size are non-NULL, the data is used to detect
11832  * the encoding.  The remaining characters will be parsed so they
11833  * don't need to be fed in again through xmlParseChunk.
11834  * To allow content encoding detection, @size should be >= 4
11835  * The value of @filename is used for fetching external entities
11836  * and error/warning reports.
11837  *
11838  * Returns the new parser context or NULL
11839  */
11840 
11841 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11842 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11843                         const char *chunk, int size, const char *filename) {
11844     xmlParserCtxtPtr ctxt;
11845     xmlParserInputPtr inputStream;
11846     xmlParserInputBufferPtr buf;
11847     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11848 
11849     /*
11850      * plug some encoding conversion routines
11851      */
11852     if ((chunk != NULL) && (size >= 4))
11853 	enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11854 
11855     buf = xmlAllocParserInputBuffer(enc);
11856     if (buf == NULL) return(NULL);
11857 
11858     ctxt = xmlNewParserCtxt();
11859     if (ctxt == NULL) {
11860         xmlErrMemory(NULL, "creating parser: out of memory\n");
11861 	xmlFreeParserInputBuffer(buf);
11862 	return(NULL);
11863     }
11864     ctxt->dictNames = 1;
11865     ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11866     if (ctxt->pushTab == NULL) {
11867         xmlErrMemory(ctxt, NULL);
11868 	xmlFreeParserInputBuffer(buf);
11869 	xmlFreeParserCtxt(ctxt);
11870 	return(NULL);
11871     }
11872     if (sax != NULL) {
11873 #ifdef LIBXML_SAX1_ENABLED
11874 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11875 #endif /* LIBXML_SAX1_ENABLED */
11876 	    xmlFree(ctxt->sax);
11877 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11878 	if (ctxt->sax == NULL) {
11879 	    xmlErrMemory(ctxt, NULL);
11880 	    xmlFreeParserInputBuffer(buf);
11881 	    xmlFreeParserCtxt(ctxt);
11882 	    return(NULL);
11883 	}
11884 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11885 	if (sax->initialized == XML_SAX2_MAGIC)
11886 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11887 	else
11888 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11889 	if (user_data != NULL)
11890 	    ctxt->userData = user_data;
11891     }
11892     if (filename == NULL) {
11893 	ctxt->directory = NULL;
11894     } else {
11895         ctxt->directory = xmlParserGetDirectory(filename);
11896     }
11897 
11898     inputStream = xmlNewInputStream(ctxt);
11899     if (inputStream == NULL) {
11900 	xmlFreeParserCtxt(ctxt);
11901 	xmlFreeParserInputBuffer(buf);
11902 	return(NULL);
11903     }
11904 
11905     if (filename == NULL)
11906 	inputStream->filename = NULL;
11907     else {
11908 	inputStream->filename = (char *)
11909 	    xmlCanonicPath((const xmlChar *) filename);
11910 	if (inputStream->filename == NULL) {
11911 	    xmlFreeParserCtxt(ctxt);
11912 	    xmlFreeParserInputBuffer(buf);
11913 	    return(NULL);
11914 	}
11915     }
11916     inputStream->buf = buf;
11917     inputStream->base = inputStream->buf->buffer->content;
11918     inputStream->cur = inputStream->buf->buffer->content;
11919     inputStream->end =
11920 	&inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11921 
11922     inputPush(ctxt, inputStream);
11923 
11924     /*
11925      * If the caller didn't provide an initial 'chunk' for determining
11926      * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11927      * that it can be automatically determined later
11928      */
11929     if ((size == 0) || (chunk == NULL)) {
11930 	ctxt->charset = XML_CHAR_ENCODING_NONE;
11931     } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11932 	int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11933 	int cur = ctxt->input->cur - ctxt->input->base;
11934 
11935 	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11936 
11937 	ctxt->input->base = ctxt->input->buf->buffer->content + base;
11938 	ctxt->input->cur = ctxt->input->base + cur;
11939 	ctxt->input->end =
11940 	    &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11941 #ifdef DEBUG_PUSH
11942 	xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11943 #endif
11944     }
11945 
11946     if (enc != XML_CHAR_ENCODING_NONE) {
11947         xmlSwitchEncoding(ctxt, enc);
11948     }
11949 
11950     return(ctxt);
11951 }
11952 #endif /* LIBXML_PUSH_ENABLED */
11953 
11954 /**
11955  * xmlStopParser:
11956  * @ctxt:  an XML parser context
11957  *
11958  * Blocks further parser processing
11959  */
11960 void
xmlStopParser(xmlParserCtxtPtr ctxt)11961 xmlStopParser(xmlParserCtxtPtr ctxt) {
11962     if (ctxt == NULL)
11963         return;
11964     ctxt->instate = XML_PARSER_EOF;
11965     ctxt->errNo = XML_ERR_USER_STOP;
11966     ctxt->disableSAX = 1;
11967     if (ctxt->input != NULL) {
11968 	ctxt->input->cur = BAD_CAST"";
11969 	ctxt->input->base = ctxt->input->cur;
11970     }
11971 }
11972 
11973 /**
11974  * xmlCreateIOParserCtxt:
11975  * @sax:  a SAX handler
11976  * @user_data:  The user data returned on SAX callbacks
11977  * @ioread:  an I/O read function
11978  * @ioclose:  an I/O close function
11979  * @ioctx:  an I/O handler
11980  * @enc:  the charset encoding if known
11981  *
11982  * Create a parser context for using the XML parser with an existing
11983  * I/O stream
11984  *
11985  * Returns the new parser context or NULL
11986  */
11987 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11988 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11989 	xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
11990 	void *ioctx, xmlCharEncoding enc) {
11991     xmlParserCtxtPtr ctxt;
11992     xmlParserInputPtr inputStream;
11993     xmlParserInputBufferPtr buf;
11994 
11995     if (ioread == NULL) return(NULL);
11996 
11997     buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11998     if (buf == NULL) return(NULL);
11999 
12000     ctxt = xmlNewParserCtxt();
12001     if (ctxt == NULL) {
12002 	xmlFreeParserInputBuffer(buf);
12003 	return(NULL);
12004     }
12005     if (sax != NULL) {
12006 #ifdef LIBXML_SAX1_ENABLED
12007 	if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12008 #endif /* LIBXML_SAX1_ENABLED */
12009 	    xmlFree(ctxt->sax);
12010 	ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12011 	if (ctxt->sax == NULL) {
12012 	    xmlErrMemory(ctxt, NULL);
12013 	    xmlFreeParserCtxt(ctxt);
12014 	    return(NULL);
12015 	}
12016 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12017 	if (sax->initialized == XML_SAX2_MAGIC)
12018 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12019 	else
12020 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12021 	if (user_data != NULL)
12022 	    ctxt->userData = user_data;
12023     }
12024 
12025     inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12026     if (inputStream == NULL) {
12027 	xmlFreeParserCtxt(ctxt);
12028 	return(NULL);
12029     }
12030     inputPush(ctxt, inputStream);
12031 
12032     return(ctxt);
12033 }
12034 
12035 #ifdef LIBXML_VALID_ENABLED
12036 /************************************************************************
12037  *									*
12038  * 		Front ends when parsing a DTD				*
12039  *									*
12040  ************************************************************************/
12041 
12042 /**
12043  * xmlIOParseDTD:
12044  * @sax:  the SAX handler block or NULL
12045  * @input:  an Input Buffer
12046  * @enc:  the charset encoding if known
12047  *
12048  * Load and parse a DTD
12049  *
12050  * Returns the resulting xmlDtdPtr or NULL in case of error.
12051  * @input will be freed by the function in any case.
12052  */
12053 
12054 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12055 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12056 	      xmlCharEncoding enc) {
12057     xmlDtdPtr ret = NULL;
12058     xmlParserCtxtPtr ctxt;
12059     xmlParserInputPtr pinput = NULL;
12060     xmlChar start[4];
12061 
12062     if (input == NULL)
12063 	return(NULL);
12064 
12065     ctxt = xmlNewParserCtxt();
12066     if (ctxt == NULL) {
12067         xmlFreeParserInputBuffer(input);
12068 	return(NULL);
12069     }
12070 
12071     /*
12072      * Set-up the SAX context
12073      */
12074     if (sax != NULL) {
12075 	if (ctxt->sax != NULL)
12076 	    xmlFree(ctxt->sax);
12077         ctxt->sax = sax;
12078         ctxt->userData = ctxt;
12079     }
12080     xmlDetectSAX2(ctxt);
12081 
12082     /*
12083      * generate a parser input from the I/O handler
12084      */
12085 
12086     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12087     if (pinput == NULL) {
12088         if (sax != NULL) ctxt->sax = NULL;
12089         xmlFreeParserInputBuffer(input);
12090 	xmlFreeParserCtxt(ctxt);
12091 	return(NULL);
12092     }
12093 
12094     /*
12095      * plug some encoding conversion routines here.
12096      */
12097     if (xmlPushInput(ctxt, pinput) < 0) {
12098         if (sax != NULL) ctxt->sax = NULL;
12099 	xmlFreeParserCtxt(ctxt);
12100 	return(NULL);
12101     }
12102     if (enc != XML_CHAR_ENCODING_NONE) {
12103         xmlSwitchEncoding(ctxt, enc);
12104     }
12105 
12106     pinput->filename = NULL;
12107     pinput->line = 1;
12108     pinput->col = 1;
12109     pinput->base = ctxt->input->cur;
12110     pinput->cur = ctxt->input->cur;
12111     pinput->free = NULL;
12112 
12113     /*
12114      * let's parse that entity knowing it's an external subset.
12115      */
12116     ctxt->inSubset = 2;
12117     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12118     if (ctxt->myDoc == NULL) {
12119 	xmlErrMemory(ctxt, "New Doc failed");
12120 	return(NULL);
12121     }
12122     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12123     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12124 	                               BAD_CAST "none", BAD_CAST "none");
12125 
12126     if ((enc == XML_CHAR_ENCODING_NONE) &&
12127         ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12128 	/*
12129 	 * Get the 4 first bytes and decode the charset
12130 	 * if enc != XML_CHAR_ENCODING_NONE
12131 	 * plug some encoding conversion routines.
12132 	 */
12133 	start[0] = RAW;
12134 	start[1] = NXT(1);
12135 	start[2] = NXT(2);
12136 	start[3] = NXT(3);
12137 	enc = xmlDetectCharEncoding(start, 4);
12138 	if (enc != XML_CHAR_ENCODING_NONE) {
12139 	    xmlSwitchEncoding(ctxt, enc);
12140 	}
12141     }
12142 
12143     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12144 
12145     if (ctxt->myDoc != NULL) {
12146 	if (ctxt->wellFormed) {
12147 	    ret = ctxt->myDoc->extSubset;
12148 	    ctxt->myDoc->extSubset = NULL;
12149 	    if (ret != NULL) {
12150 		xmlNodePtr tmp;
12151 
12152 		ret->doc = NULL;
12153 		tmp = ret->children;
12154 		while (tmp != NULL) {
12155 		    tmp->doc = NULL;
12156 		    tmp = tmp->next;
12157 		}
12158 	    }
12159 	} else {
12160 	    ret = NULL;
12161 	}
12162         xmlFreeDoc(ctxt->myDoc);
12163         ctxt->myDoc = NULL;
12164     }
12165     if (sax != NULL) ctxt->sax = NULL;
12166     xmlFreeParserCtxt(ctxt);
12167 
12168     return(ret);
12169 }
12170 
12171 /**
12172  * xmlSAXParseDTD:
12173  * @sax:  the SAX handler block
12174  * @ExternalID:  a NAME* containing the External ID of the DTD
12175  * @SystemID:  a NAME* containing the URL to the DTD
12176  *
12177  * Load and parse an external subset.
12178  *
12179  * Returns the resulting xmlDtdPtr or NULL in case of error.
12180  */
12181 
12182 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12183 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12184                           const xmlChar *SystemID) {
12185     xmlDtdPtr ret = NULL;
12186     xmlParserCtxtPtr ctxt;
12187     xmlParserInputPtr input = NULL;
12188     xmlCharEncoding enc;
12189     xmlChar* systemIdCanonic;
12190 
12191     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12192 
12193     ctxt = xmlNewParserCtxt();
12194     if (ctxt == NULL) {
12195 	return(NULL);
12196     }
12197 
12198     /*
12199      * Set-up the SAX context
12200      */
12201     if (sax != NULL) {
12202 	if (ctxt->sax != NULL)
12203 	    xmlFree(ctxt->sax);
12204         ctxt->sax = sax;
12205         ctxt->userData = ctxt;
12206     }
12207 
12208     /*
12209      * Canonicalise the system ID
12210      */
12211     systemIdCanonic = xmlCanonicPath(SystemID);
12212     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12213 	xmlFreeParserCtxt(ctxt);
12214 	return(NULL);
12215     }
12216 
12217     /*
12218      * Ask the Entity resolver to load the damn thing
12219      */
12220 
12221     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12222 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12223 	                                 systemIdCanonic);
12224     if (input == NULL) {
12225         if (sax != NULL) ctxt->sax = NULL;
12226 	xmlFreeParserCtxt(ctxt);
12227 	if (systemIdCanonic != NULL)
12228 	    xmlFree(systemIdCanonic);
12229 	return(NULL);
12230     }
12231 
12232     /*
12233      * plug some encoding conversion routines here.
12234      */
12235     if (xmlPushInput(ctxt, input) < 0) {
12236         if (sax != NULL) ctxt->sax = NULL;
12237 	xmlFreeParserCtxt(ctxt);
12238 	if (systemIdCanonic != NULL)
12239 	    xmlFree(systemIdCanonic);
12240 	return(NULL);
12241     }
12242     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12243 	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12244 	xmlSwitchEncoding(ctxt, enc);
12245     }
12246 
12247     if (input->filename == NULL)
12248 	input->filename = (char *) systemIdCanonic;
12249     else
12250 	xmlFree(systemIdCanonic);
12251     input->line = 1;
12252     input->col = 1;
12253     input->base = ctxt->input->cur;
12254     input->cur = ctxt->input->cur;
12255     input->free = NULL;
12256 
12257     /*
12258      * let's parse that entity knowing it's an external subset.
12259      */
12260     ctxt->inSubset = 2;
12261     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12262     if (ctxt->myDoc == NULL) {
12263 	xmlErrMemory(ctxt, "New Doc failed");
12264         if (sax != NULL) ctxt->sax = NULL;
12265 	xmlFreeParserCtxt(ctxt);
12266 	return(NULL);
12267     }
12268     ctxt->myDoc->properties = XML_DOC_INTERNAL;
12269     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12270 	                               ExternalID, SystemID);
12271     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12272 
12273     if (ctxt->myDoc != NULL) {
12274 	if (ctxt->wellFormed) {
12275 	    ret = ctxt->myDoc->extSubset;
12276 	    ctxt->myDoc->extSubset = NULL;
12277 	    if (ret != NULL) {
12278 		xmlNodePtr tmp;
12279 
12280 		ret->doc = NULL;
12281 		tmp = ret->children;
12282 		while (tmp != NULL) {
12283 		    tmp->doc = NULL;
12284 		    tmp = tmp->next;
12285 		}
12286 	    }
12287 	} else {
12288 	    ret = NULL;
12289 	}
12290         xmlFreeDoc(ctxt->myDoc);
12291         ctxt->myDoc = NULL;
12292     }
12293     if (sax != NULL) ctxt->sax = NULL;
12294     xmlFreeParserCtxt(ctxt);
12295 
12296     return(ret);
12297 }
12298 
12299 
12300 /**
12301  * xmlParseDTD:
12302  * @ExternalID:  a NAME* containing the External ID of the DTD
12303  * @SystemID:  a NAME* containing the URL to the DTD
12304  *
12305  * Load and parse an external subset.
12306  *
12307  * Returns the resulting xmlDtdPtr or NULL in case of error.
12308  */
12309 
12310 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12311 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12312     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12313 }
12314 #endif /* LIBXML_VALID_ENABLED */
12315 
12316 /************************************************************************
12317  *									*
12318  * 		Front ends when parsing an Entity			*
12319  *									*
12320  ************************************************************************/
12321 
12322 /**
12323  * xmlParseCtxtExternalEntity:
12324  * @ctx:  the existing parsing context
12325  * @URL:  the URL for the entity to load
12326  * @ID:  the System ID for the entity to load
12327  * @lst:  the return value for the set of parsed nodes
12328  *
12329  * Parse an external general entity within an existing parsing context
12330  * An external general parsed entity is well-formed if it matches the
12331  * production labeled extParsedEnt.
12332  *
12333  * [78] extParsedEnt ::= TextDecl? content
12334  *
12335  * Returns 0 if the entity is well formed, -1 in case of args problem and
12336  *    the parser error code otherwise
12337  */
12338 
12339 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12340 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12341 	               const xmlChar *ID, xmlNodePtr *lst) {
12342     xmlParserCtxtPtr ctxt;
12343     xmlDocPtr newDoc;
12344     xmlNodePtr newRoot;
12345     xmlSAXHandlerPtr oldsax = NULL;
12346     int ret = 0;
12347     xmlChar start[4];
12348     xmlCharEncoding enc;
12349 
12350     if (ctx == NULL) return(-1);
12351 
12352     if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12353         (ctx->depth > 1024)) {
12354 	return(XML_ERR_ENTITY_LOOP);
12355     }
12356 
12357     if (lst != NULL)
12358         *lst = NULL;
12359     if ((URL == NULL) && (ID == NULL))
12360 	return(-1);
12361     if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12362 	return(-1);
12363 
12364     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12365     if (ctxt == NULL) {
12366 	return(-1);
12367     }
12368 
12369     oldsax = ctxt->sax;
12370     ctxt->sax = ctx->sax;
12371     xmlDetectSAX2(ctxt);
12372     newDoc = xmlNewDoc(BAD_CAST "1.0");
12373     if (newDoc == NULL) {
12374 	xmlFreeParserCtxt(ctxt);
12375 	return(-1);
12376     }
12377     newDoc->properties = XML_DOC_INTERNAL;
12378     if (ctx->myDoc->dict) {
12379 	newDoc->dict = ctx->myDoc->dict;
12380 	xmlDictReference(newDoc->dict);
12381     }
12382     if (ctx->myDoc != NULL) {
12383 	newDoc->intSubset = ctx->myDoc->intSubset;
12384 	newDoc->extSubset = ctx->myDoc->extSubset;
12385     }
12386     if (ctx->myDoc->URL != NULL) {
12387 	newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12388     }
12389     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12390     if (newRoot == NULL) {
12391 	ctxt->sax = oldsax;
12392 	xmlFreeParserCtxt(ctxt);
12393 	newDoc->intSubset = NULL;
12394 	newDoc->extSubset = NULL;
12395         xmlFreeDoc(newDoc);
12396 	return(-1);
12397     }
12398     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12399     nodePush(ctxt, newDoc->children);
12400     if (ctx->myDoc == NULL) {
12401 	ctxt->myDoc = newDoc;
12402     } else {
12403 	ctxt->myDoc = ctx->myDoc;
12404 	newDoc->children->doc = ctx->myDoc;
12405     }
12406 
12407     /*
12408      * Get the 4 first bytes and decode the charset
12409      * if enc != XML_CHAR_ENCODING_NONE
12410      * plug some encoding conversion routines.
12411      */
12412     GROW
12413     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12414 	start[0] = RAW;
12415 	start[1] = NXT(1);
12416 	start[2] = NXT(2);
12417 	start[3] = NXT(3);
12418 	enc = xmlDetectCharEncoding(start, 4);
12419 	if (enc != XML_CHAR_ENCODING_NONE) {
12420 	    xmlSwitchEncoding(ctxt, enc);
12421 	}
12422     }
12423 
12424     /*
12425      * Parse a possible text declaration first
12426      */
12427     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12428 	xmlParseTextDecl(ctxt);
12429 	/*
12430 	 * An XML-1.0 document can't reference an entity not XML-1.0
12431 	 */
12432 	if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12433 	    (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12434 	    xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12435 	                   "Version mismatch between document and entity\n");
12436 	}
12437     }
12438 
12439     /*
12440      * Doing validity checking on chunk doesn't make sense
12441      */
12442     ctxt->instate = XML_PARSER_CONTENT;
12443     ctxt->validate = ctx->validate;
12444     ctxt->valid = ctx->valid;
12445     ctxt->loadsubset = ctx->loadsubset;
12446     ctxt->depth = ctx->depth + 1;
12447     ctxt->replaceEntities = ctx->replaceEntities;
12448     if (ctxt->validate) {
12449 	ctxt->vctxt.error = ctx->vctxt.error;
12450 	ctxt->vctxt.warning = ctx->vctxt.warning;
12451     } else {
12452 	ctxt->vctxt.error = NULL;
12453 	ctxt->vctxt.warning = NULL;
12454     }
12455     ctxt->vctxt.nodeTab = NULL;
12456     ctxt->vctxt.nodeNr = 0;
12457     ctxt->vctxt.nodeMax = 0;
12458     ctxt->vctxt.node = NULL;
12459     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12460     ctxt->dict = ctx->dict;
12461     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12462     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12463     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12464     ctxt->dictNames = ctx->dictNames;
12465     ctxt->attsDefault = ctx->attsDefault;
12466     ctxt->attsSpecial = ctx->attsSpecial;
12467     ctxt->linenumbers = ctx->linenumbers;
12468 
12469     xmlParseContent(ctxt);
12470 
12471     ctx->validate = ctxt->validate;
12472     ctx->valid = ctxt->valid;
12473     if ((RAW == '<') && (NXT(1) == '/')) {
12474 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12475     } else if (RAW != 0) {
12476 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12477     }
12478     if (ctxt->node != newDoc->children) {
12479 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12480     }
12481 
12482     if (!ctxt->wellFormed) {
12483         if (ctxt->errNo == 0)
12484 	    ret = 1;
12485 	else
12486 	    ret = ctxt->errNo;
12487     } else {
12488 	if (lst != NULL) {
12489 	    xmlNodePtr cur;
12490 
12491 	    /*
12492 	     * Return the newly created nodeset after unlinking it from
12493 	     * they pseudo parent.
12494 	     */
12495 	    cur = newDoc->children->children;
12496 	    *lst = cur;
12497 	    while (cur != NULL) {
12498 		cur->parent = NULL;
12499 		cur = cur->next;
12500 	    }
12501             newDoc->children->children = NULL;
12502 	}
12503 	ret = 0;
12504     }
12505     ctxt->sax = oldsax;
12506     ctxt->dict = NULL;
12507     ctxt->attsDefault = NULL;
12508     ctxt->attsSpecial = NULL;
12509     xmlFreeParserCtxt(ctxt);
12510     newDoc->intSubset = NULL;
12511     newDoc->extSubset = NULL;
12512     xmlFreeDoc(newDoc);
12513 
12514     return(ret);
12515 }
12516 
12517 /**
12518  * xmlParseExternalEntityPrivate:
12519  * @doc:  the document the chunk pertains to
12520  * @oldctxt:  the previous parser context if available
12521  * @sax:  the SAX handler bloc (possibly NULL)
12522  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12523  * @depth:  Used for loop detection, use 0
12524  * @URL:  the URL for the entity to load
12525  * @ID:  the System ID for the entity to load
12526  * @list:  the return value for the set of parsed nodes
12527  *
12528  * Private version of xmlParseExternalEntity()
12529  *
12530  * Returns 0 if the entity is well formed, -1 in case of args problem and
12531  *    the parser error code otherwise
12532  */
12533 
12534 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12535 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12536 	              xmlSAXHandlerPtr sax,
12537 		      void *user_data, int depth, const xmlChar *URL,
12538 		      const xmlChar *ID, xmlNodePtr *list) {
12539     xmlParserCtxtPtr ctxt;
12540     xmlDocPtr newDoc;
12541     xmlNodePtr newRoot;
12542     xmlSAXHandlerPtr oldsax = NULL;
12543     xmlParserErrors ret = XML_ERR_OK;
12544     xmlChar start[4];
12545     xmlCharEncoding enc;
12546 
12547     if (((depth > 40) &&
12548 	((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12549 	(depth > 1024)) {
12550 	return(XML_ERR_ENTITY_LOOP);
12551     }
12552 
12553     if (list != NULL)
12554         *list = NULL;
12555     if ((URL == NULL) && (ID == NULL))
12556 	return(XML_ERR_INTERNAL_ERROR);
12557     if (doc == NULL)
12558 	return(XML_ERR_INTERNAL_ERROR);
12559 
12560 
12561     ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12562     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12563     ctxt->userData = ctxt;
12564     if (oldctxt != NULL) {
12565 	ctxt->_private = oldctxt->_private;
12566 	ctxt->loadsubset = oldctxt->loadsubset;
12567 	ctxt->validate = oldctxt->validate;
12568 	ctxt->external = oldctxt->external;
12569 	ctxt->record_info = oldctxt->record_info;
12570 	ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12571 	ctxt->node_seq.length = oldctxt->node_seq.length;
12572 	ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12573     } else {
12574 	/*
12575 	 * Doing validity checking on chunk without context
12576 	 * doesn't make sense
12577 	 */
12578 	ctxt->_private = NULL;
12579 	ctxt->validate = 0;
12580 	ctxt->external = 2;
12581 	ctxt->loadsubset = 0;
12582     }
12583     if (sax != NULL) {
12584 	oldsax = ctxt->sax;
12585         ctxt->sax = sax;
12586 	if (user_data != NULL)
12587 	    ctxt->userData = user_data;
12588     }
12589     xmlDetectSAX2(ctxt);
12590     newDoc = xmlNewDoc(BAD_CAST "1.0");
12591     if (newDoc == NULL) {
12592 	ctxt->node_seq.maximum = 0;
12593 	ctxt->node_seq.length = 0;
12594 	ctxt->node_seq.buffer = NULL;
12595 	xmlFreeParserCtxt(ctxt);
12596 	return(XML_ERR_INTERNAL_ERROR);
12597     }
12598     newDoc->properties = XML_DOC_INTERNAL;
12599     newDoc->intSubset = doc->intSubset;
12600     newDoc->extSubset = doc->extSubset;
12601     newDoc->dict = doc->dict;
12602     xmlDictReference(newDoc->dict);
12603 
12604     if (doc->URL != NULL) {
12605 	newDoc->URL = xmlStrdup(doc->URL);
12606     }
12607     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12608     if (newRoot == NULL) {
12609 	if (sax != NULL)
12610 	    ctxt->sax = oldsax;
12611 	ctxt->node_seq.maximum = 0;
12612 	ctxt->node_seq.length = 0;
12613 	ctxt->node_seq.buffer = NULL;
12614 	xmlFreeParserCtxt(ctxt);
12615 	newDoc->intSubset = NULL;
12616 	newDoc->extSubset = NULL;
12617         xmlFreeDoc(newDoc);
12618 	return(XML_ERR_INTERNAL_ERROR);
12619     }
12620     xmlAddChild((xmlNodePtr) newDoc, newRoot);
12621     nodePush(ctxt, newDoc->children);
12622     ctxt->myDoc = doc;
12623     newRoot->doc = doc;
12624 
12625     /*
12626      * Get the 4 first bytes and decode the charset
12627      * if enc != XML_CHAR_ENCODING_NONE
12628      * plug some encoding conversion routines.
12629      */
12630     GROW;
12631     if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12632 	start[0] = RAW;
12633 	start[1] = NXT(1);
12634 	start[2] = NXT(2);
12635 	start[3] = NXT(3);
12636 	enc = xmlDetectCharEncoding(start, 4);
12637 	if (enc != XML_CHAR_ENCODING_NONE) {
12638 	    xmlSwitchEncoding(ctxt, enc);
12639 	}
12640     }
12641 
12642     /*
12643      * Parse a possible text declaration first
12644      */
12645     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12646 	xmlParseTextDecl(ctxt);
12647     }
12648 
12649     ctxt->instate = XML_PARSER_CONTENT;
12650     ctxt->depth = depth;
12651 
12652     xmlParseContent(ctxt);
12653 
12654     if ((RAW == '<') && (NXT(1) == '/')) {
12655 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12656     } else if (RAW != 0) {
12657 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12658     }
12659     if (ctxt->node != newDoc->children) {
12660 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12661     }
12662 
12663     if (!ctxt->wellFormed) {
12664         if (ctxt->errNo == 0)
12665 	    ret = XML_ERR_INTERNAL_ERROR;
12666 	else
12667 	    ret = (xmlParserErrors)ctxt->errNo;
12668     } else {
12669 	if (list != NULL) {
12670 	    xmlNodePtr cur;
12671 
12672 	    /*
12673 	     * Return the newly created nodeset after unlinking it from
12674 	     * they pseudo parent.
12675 	     */
12676 	    cur = newDoc->children->children;
12677 	    *list = cur;
12678 	    while (cur != NULL) {
12679 		cur->parent = NULL;
12680 		cur = cur->next;
12681 	    }
12682             newDoc->children->children = NULL;
12683 	}
12684 	ret = XML_ERR_OK;
12685     }
12686 
12687     /*
12688      * Record in the parent context the number of entities replacement
12689      * done when parsing that reference.
12690      */
12691     if (oldctxt != NULL)
12692         oldctxt->nbentities += ctxt->nbentities;
12693 
12694     /*
12695      * Also record the size of the entity parsed
12696      */
12697     if (ctxt->input != NULL) {
12698 	oldctxt->sizeentities += ctxt->input->consumed;
12699 	oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12700     }
12701     /*
12702      * And record the last error if any
12703      */
12704     if (ctxt->lastError.code != XML_ERR_OK)
12705         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12706 
12707     if (sax != NULL)
12708 	ctxt->sax = oldsax;
12709     oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12710     oldctxt->node_seq.length = ctxt->node_seq.length;
12711     oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12712     ctxt->node_seq.maximum = 0;
12713     ctxt->node_seq.length = 0;
12714     ctxt->node_seq.buffer = NULL;
12715     xmlFreeParserCtxt(ctxt);
12716     newDoc->intSubset = NULL;
12717     newDoc->extSubset = NULL;
12718     xmlFreeDoc(newDoc);
12719 
12720     return(ret);
12721 }
12722 
12723 #ifdef LIBXML_SAX1_ENABLED
12724 /**
12725  * xmlParseExternalEntity:
12726  * @doc:  the document the chunk pertains to
12727  * @sax:  the SAX handler bloc (possibly NULL)
12728  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12729  * @depth:  Used for loop detection, use 0
12730  * @URL:  the URL for the entity to load
12731  * @ID:  the System ID for the entity to load
12732  * @lst:  the return value for the set of parsed nodes
12733  *
12734  * Parse an external general entity
12735  * An external general parsed entity is well-formed if it matches the
12736  * production labeled extParsedEnt.
12737  *
12738  * [78] extParsedEnt ::= TextDecl? content
12739  *
12740  * Returns 0 if the entity is well formed, -1 in case of args problem and
12741  *    the parser error code otherwise
12742  */
12743 
12744 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12745 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12746 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12747     return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12748 		                       ID, lst));
12749 }
12750 
12751 /**
12752  * xmlParseBalancedChunkMemory:
12753  * @doc:  the document the chunk pertains to
12754  * @sax:  the SAX handler bloc (possibly NULL)
12755  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12756  * @depth:  Used for loop detection, use 0
12757  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12758  * @lst:  the return value for the set of parsed nodes
12759  *
12760  * Parse a well-balanced chunk of an XML document
12761  * called by the parser
12762  * The allowed sequence for the Well Balanced Chunk is the one defined by
12763  * the content production in the XML grammar:
12764  *
12765  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12766  *
12767  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12768  *    the parser error code otherwise
12769  */
12770 
12771 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12772 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12773      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12774     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12775                                                 depth, string, lst, 0 );
12776 }
12777 #endif /* LIBXML_SAX1_ENABLED */
12778 
12779 /**
12780  * xmlParseBalancedChunkMemoryInternal:
12781  * @oldctxt:  the existing parsing context
12782  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12783  * @user_data:  the user data field for the parser context
12784  * @lst:  the return value for the set of parsed nodes
12785  *
12786  *
12787  * Parse a well-balanced chunk of an XML document
12788  * called by the parser
12789  * The allowed sequence for the Well Balanced Chunk is the one defined by
12790  * the content production in the XML grammar:
12791  *
12792  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12793  *
12794  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12795  * error code otherwise
12796  *
12797  * In case recover is set to 1, the nodelist will not be empty even if
12798  * the parsed chunk is not well balanced.
12799  */
12800 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12801 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12802 	const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12803     xmlParserCtxtPtr ctxt;
12804     xmlDocPtr newDoc = NULL;
12805     xmlNodePtr newRoot;
12806     xmlSAXHandlerPtr oldsax = NULL;
12807     xmlNodePtr content = NULL;
12808     xmlNodePtr last = NULL;
12809     int size;
12810     xmlParserErrors ret = XML_ERR_OK;
12811 #ifdef SAX2
12812     int i;
12813 #endif
12814 
12815     if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12816         (oldctxt->depth >  1024)) {
12817 	return(XML_ERR_ENTITY_LOOP);
12818     }
12819 
12820 
12821     if (lst != NULL)
12822         *lst = NULL;
12823     if (string == NULL)
12824         return(XML_ERR_INTERNAL_ERROR);
12825 
12826     size = xmlStrlen(string);
12827 
12828     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12829     if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12830     if (user_data != NULL)
12831 	ctxt->userData = user_data;
12832     else
12833 	ctxt->userData = ctxt;
12834     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12835     ctxt->dict = oldctxt->dict;
12836     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12837     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12838     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12839 
12840 #ifdef SAX2
12841     /* propagate namespaces down the entity */
12842     for (i = 0;i < oldctxt->nsNr;i += 2) {
12843         nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12844     }
12845 #endif
12846 
12847     oldsax = ctxt->sax;
12848     ctxt->sax = oldctxt->sax;
12849     xmlDetectSAX2(ctxt);
12850     ctxt->replaceEntities = oldctxt->replaceEntities;
12851     ctxt->options = oldctxt->options;
12852 
12853     ctxt->_private = oldctxt->_private;
12854     if (oldctxt->myDoc == NULL) {
12855 	newDoc = xmlNewDoc(BAD_CAST "1.0");
12856 	if (newDoc == NULL) {
12857 	    ctxt->sax = oldsax;
12858 	    ctxt->dict = NULL;
12859 	    xmlFreeParserCtxt(ctxt);
12860 	    return(XML_ERR_INTERNAL_ERROR);
12861 	}
12862 	newDoc->properties = XML_DOC_INTERNAL;
12863 	newDoc->dict = ctxt->dict;
12864 	xmlDictReference(newDoc->dict);
12865 	ctxt->myDoc = newDoc;
12866     } else {
12867 	ctxt->myDoc = oldctxt->myDoc;
12868         content = ctxt->myDoc->children;
12869 	last = ctxt->myDoc->last;
12870     }
12871     newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12872     if (newRoot == NULL) {
12873 	ctxt->sax = oldsax;
12874 	ctxt->dict = NULL;
12875 	xmlFreeParserCtxt(ctxt);
12876 	if (newDoc != NULL) {
12877 	    xmlFreeDoc(newDoc);
12878 	}
12879 	return(XML_ERR_INTERNAL_ERROR);
12880     }
12881     ctxt->myDoc->children = NULL;
12882     ctxt->myDoc->last = NULL;
12883     xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12884     nodePush(ctxt, ctxt->myDoc->children);
12885     ctxt->instate = XML_PARSER_CONTENT;
12886     ctxt->depth = oldctxt->depth + 1;
12887 
12888     ctxt->validate = 0;
12889     ctxt->loadsubset = oldctxt->loadsubset;
12890     if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12891 	/*
12892 	 * ID/IDREF registration will be done in xmlValidateElement below
12893 	 */
12894 	ctxt->loadsubset |= XML_SKIP_IDS;
12895     }
12896     ctxt->dictNames = oldctxt->dictNames;
12897     ctxt->attsDefault = oldctxt->attsDefault;
12898     ctxt->attsSpecial = oldctxt->attsSpecial;
12899 
12900     xmlParseContent(ctxt);
12901     if ((RAW == '<') && (NXT(1) == '/')) {
12902 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12903     } else if (RAW != 0) {
12904 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12905     }
12906     if (ctxt->node != ctxt->myDoc->children) {
12907 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12908     }
12909 
12910     if (!ctxt->wellFormed) {
12911         if (ctxt->errNo == 0)
12912 	    ret = XML_ERR_INTERNAL_ERROR;
12913 	else
12914 	    ret = (xmlParserErrors)ctxt->errNo;
12915     } else {
12916       ret = XML_ERR_OK;
12917     }
12918 
12919     if ((lst != NULL) && (ret == XML_ERR_OK)) {
12920 	xmlNodePtr cur;
12921 
12922 	/*
12923 	 * Return the newly created nodeset after unlinking it from
12924 	 * they pseudo parent.
12925 	 */
12926 	cur = ctxt->myDoc->children->children;
12927 	*lst = cur;
12928 	while (cur != NULL) {
12929 #ifdef LIBXML_VALID_ENABLED
12930 	    if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12931 		(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12932 		(cur->type == XML_ELEMENT_NODE)) {
12933 		oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12934 			oldctxt->myDoc, cur);
12935 	    }
12936 #endif /* LIBXML_VALID_ENABLED */
12937 	    cur->parent = NULL;
12938 	    cur = cur->next;
12939 	}
12940 	ctxt->myDoc->children->children = NULL;
12941     }
12942     if (ctxt->myDoc != NULL) {
12943 	xmlFreeNode(ctxt->myDoc->children);
12944         ctxt->myDoc->children = content;
12945         ctxt->myDoc->last = last;
12946     }
12947 
12948     /*
12949      * Record in the parent context the number of entities replacement
12950      * done when parsing that reference.
12951      */
12952     if (oldctxt != NULL)
12953         oldctxt->nbentities += ctxt->nbentities;
12954 
12955     /*
12956      * Also record the last error if any
12957      */
12958     if (ctxt->lastError.code != XML_ERR_OK)
12959         xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12960 
12961     ctxt->sax = oldsax;
12962     ctxt->dict = NULL;
12963     ctxt->attsDefault = NULL;
12964     ctxt->attsSpecial = NULL;
12965     xmlFreeParserCtxt(ctxt);
12966     if (newDoc != NULL) {
12967 	xmlFreeDoc(newDoc);
12968     }
12969 
12970     return(ret);
12971 }
12972 
12973 /**
12974  * xmlParseInNodeContext:
12975  * @node:  the context node
12976  * @data:  the input string
12977  * @datalen:  the input string length in bytes
12978  * @options:  a combination of xmlParserOption
12979  * @lst:  the return value for the set of parsed nodes
12980  *
12981  * Parse a well-balanced chunk of an XML document
12982  * within the context (DTD, namespaces, etc ...) of the given node.
12983  *
12984  * The allowed sequence for the data is a Well Balanced Chunk defined by
12985  * the content production in the XML grammar:
12986  *
12987  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12988  *
12989  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12990  * error code otherwise
12991  */
12992 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12993 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12994                       int options, xmlNodePtr *lst) {
12995 #ifdef SAX2
12996     xmlParserCtxtPtr ctxt;
12997     xmlDocPtr doc = NULL;
12998     xmlNodePtr fake, cur;
12999     int nsnr = 0;
13000 
13001     xmlParserErrors ret = XML_ERR_OK;
13002 
13003     /*
13004      * check all input parameters, grab the document
13005      */
13006     if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13007         return(XML_ERR_INTERNAL_ERROR);
13008     switch (node->type) {
13009         case XML_ELEMENT_NODE:
13010         case XML_ATTRIBUTE_NODE:
13011         case XML_TEXT_NODE:
13012         case XML_CDATA_SECTION_NODE:
13013         case XML_ENTITY_REF_NODE:
13014         case XML_PI_NODE:
13015         case XML_COMMENT_NODE:
13016         case XML_DOCUMENT_NODE:
13017         case XML_HTML_DOCUMENT_NODE:
13018 	    break;
13019 	default:
13020 	    return(XML_ERR_INTERNAL_ERROR);
13021 
13022     }
13023     while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13024            (node->type != XML_DOCUMENT_NODE) &&
13025 	   (node->type != XML_HTML_DOCUMENT_NODE))
13026 	node = node->parent;
13027     if (node == NULL)
13028 	return(XML_ERR_INTERNAL_ERROR);
13029     if (node->type == XML_ELEMENT_NODE)
13030 	doc = node->doc;
13031     else
13032         doc = (xmlDocPtr) node;
13033     if (doc == NULL)
13034 	return(XML_ERR_INTERNAL_ERROR);
13035 
13036     /*
13037      * allocate a context and set-up everything not related to the
13038      * node position in the tree
13039      */
13040     if (doc->type == XML_DOCUMENT_NODE)
13041 	ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13042 #ifdef LIBXML_HTML_ENABLED
13043     else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13044 	ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13045         /*
13046          * When parsing in context, it makes no sense to add implied
13047          * elements like html/body/etc...
13048          */
13049         options |= HTML_PARSE_NOIMPLIED;
13050     }
13051 #endif
13052     else
13053         return(XML_ERR_INTERNAL_ERROR);
13054 
13055     if (ctxt == NULL)
13056         return(XML_ERR_NO_MEMORY);
13057 
13058     /*
13059      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13060      * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13061      * we must wait until the last moment to free the original one.
13062      */
13063     if (doc->dict != NULL) {
13064         if (ctxt->dict != NULL)
13065 	    xmlDictFree(ctxt->dict);
13066 	ctxt->dict = doc->dict;
13067     } else
13068         options |= XML_PARSE_NODICT;
13069 
13070     if (doc->encoding != NULL) {
13071         xmlCharEncodingHandlerPtr hdlr;
13072 
13073         if (ctxt->encoding != NULL)
13074 	    xmlFree((xmlChar *) ctxt->encoding);
13075         ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13076 
13077         hdlr = xmlFindCharEncodingHandler(doc->encoding);
13078         if (hdlr != NULL) {
13079             xmlSwitchToEncoding(ctxt, hdlr);
13080 	} else {
13081             return(XML_ERR_UNSUPPORTED_ENCODING);
13082         }
13083     }
13084 
13085     xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13086     xmlDetectSAX2(ctxt);
13087     ctxt->myDoc = doc;
13088 
13089     fake = xmlNewComment(NULL);
13090     if (fake == NULL) {
13091         xmlFreeParserCtxt(ctxt);
13092 	return(XML_ERR_NO_MEMORY);
13093     }
13094     xmlAddChild(node, fake);
13095 
13096     if (node->type == XML_ELEMENT_NODE) {
13097 	nodePush(ctxt, node);
13098 	/*
13099 	 * initialize the SAX2 namespaces stack
13100 	 */
13101 	cur = node;
13102 	while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13103 	    xmlNsPtr ns = cur->nsDef;
13104 	    const xmlChar *iprefix, *ihref;
13105 
13106 	    while (ns != NULL) {
13107 		if (ctxt->dict) {
13108 		    iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13109 		    ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13110 		} else {
13111 		    iprefix = ns->prefix;
13112 		    ihref = ns->href;
13113 		}
13114 
13115 	        if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13116 		    nsPush(ctxt, iprefix, ihref);
13117 		    nsnr++;
13118 		}
13119 		ns = ns->next;
13120 	    }
13121 	    cur = cur->parent;
13122 	}
13123 	ctxt->instate = XML_PARSER_CONTENT;
13124     }
13125 
13126     if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13127 	/*
13128 	 * ID/IDREF registration will be done in xmlValidateElement below
13129 	 */
13130 	ctxt->loadsubset |= XML_SKIP_IDS;
13131     }
13132 
13133 #ifdef LIBXML_HTML_ENABLED
13134     if (doc->type == XML_HTML_DOCUMENT_NODE)
13135         __htmlParseContent(ctxt);
13136     else
13137 #endif
13138 	xmlParseContent(ctxt);
13139 
13140     nsPop(ctxt, nsnr);
13141     if ((RAW == '<') && (NXT(1) == '/')) {
13142 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13143     } else if (RAW != 0) {
13144 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13145     }
13146     if ((ctxt->node != NULL) && (ctxt->node != node)) {
13147 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13148 	ctxt->wellFormed = 0;
13149     }
13150 
13151     if (!ctxt->wellFormed) {
13152         if (ctxt->errNo == 0)
13153 	    ret = XML_ERR_INTERNAL_ERROR;
13154 	else
13155 	    ret = (xmlParserErrors)ctxt->errNo;
13156     } else {
13157         ret = XML_ERR_OK;
13158     }
13159 
13160     /*
13161      * Return the newly created nodeset after unlinking it from
13162      * the pseudo sibling.
13163      */
13164 
13165     cur = fake->next;
13166     fake->next = NULL;
13167     node->last = fake;
13168 
13169     if (cur != NULL) {
13170 	cur->prev = NULL;
13171     }
13172 
13173     *lst = cur;
13174 
13175     while (cur != NULL) {
13176 	cur->parent = NULL;
13177 	cur = cur->next;
13178     }
13179 
13180     xmlUnlinkNode(fake);
13181     xmlFreeNode(fake);
13182 
13183 
13184     if (ret != XML_ERR_OK) {
13185         xmlFreeNodeList(*lst);
13186 	*lst = NULL;
13187     }
13188 
13189     if (doc->dict != NULL)
13190         ctxt->dict = NULL;
13191     xmlFreeParserCtxt(ctxt);
13192 
13193     return(ret);
13194 #else /* !SAX2 */
13195     return(XML_ERR_INTERNAL_ERROR);
13196 #endif
13197 }
13198 
13199 #ifdef LIBXML_SAX1_ENABLED
13200 /**
13201  * xmlParseBalancedChunkMemoryRecover:
13202  * @doc:  the document the chunk pertains to
13203  * @sax:  the SAX handler bloc (possibly NULL)
13204  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13205  * @depth:  Used for loop detection, use 0
13206  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13207  * @lst:  the return value for the set of parsed nodes
13208  * @recover: return nodes even if the data is broken (use 0)
13209  *
13210  *
13211  * Parse a well-balanced chunk of an XML document
13212  * called by the parser
13213  * The allowed sequence for the Well Balanced Chunk is the one defined by
13214  * the content production in the XML grammar:
13215  *
13216  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13217  *
13218  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13219  *    the parser error code otherwise
13220  *
13221  * In case recover is set to 1, the nodelist will not be empty even if
13222  * the parsed chunk is not well balanced, assuming the parsing succeeded to
13223  * some extent.
13224  */
13225 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13226 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13227      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13228      int recover) {
13229     xmlParserCtxtPtr ctxt;
13230     xmlDocPtr newDoc;
13231     xmlSAXHandlerPtr oldsax = NULL;
13232     xmlNodePtr content, newRoot;
13233     int size;
13234     int ret = 0;
13235 
13236     if (depth > 40) {
13237 	return(XML_ERR_ENTITY_LOOP);
13238     }
13239 
13240 
13241     if (lst != NULL)
13242         *lst = NULL;
13243     if (string == NULL)
13244         return(-1);
13245 
13246     size = xmlStrlen(string);
13247 
13248     ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13249     if (ctxt == NULL) return(-1);
13250     ctxt->userData = ctxt;
13251     if (sax != NULL) {
13252 	oldsax = ctxt->sax;
13253         ctxt->sax = sax;
13254 	if (user_data != NULL)
13255 	    ctxt->userData = user_data;
13256     }
13257     newDoc = xmlNewDoc(BAD_CAST "1.0");
13258     if (newDoc == NULL) {
13259 	xmlFreeParserCtxt(ctxt);
13260 	return(-1);
13261     }
13262     newDoc->properties = XML_DOC_INTERNAL;
13263     if ((doc != NULL) && (doc->dict != NULL)) {
13264         xmlDictFree(ctxt->dict);
13265 	ctxt->dict = doc->dict;
13266 	xmlDictReference(ctxt->dict);
13267 	ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13268 	ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13269 	ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13270 	ctxt->dictNames = 1;
13271     } else {
13272 	xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13273     }
13274     if (doc != NULL) {
13275 	newDoc->intSubset = doc->intSubset;
13276 	newDoc->extSubset = doc->extSubset;
13277     }
13278     newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13279     if (newRoot == NULL) {
13280 	if (sax != NULL)
13281 	    ctxt->sax = oldsax;
13282 	xmlFreeParserCtxt(ctxt);
13283 	newDoc->intSubset = NULL;
13284 	newDoc->extSubset = NULL;
13285         xmlFreeDoc(newDoc);
13286 	return(-1);
13287     }
13288     xmlAddChild((xmlNodePtr) newDoc, newRoot);
13289     nodePush(ctxt, newRoot);
13290     if (doc == NULL) {
13291 	ctxt->myDoc = newDoc;
13292     } else {
13293 	ctxt->myDoc = newDoc;
13294 	newDoc->children->doc = doc;
13295 	/* Ensure that doc has XML spec namespace */
13296 	xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13297 	newDoc->oldNs = doc->oldNs;
13298     }
13299     ctxt->instate = XML_PARSER_CONTENT;
13300     ctxt->depth = depth;
13301 
13302     /*
13303      * Doing validity checking on chunk doesn't make sense
13304      */
13305     ctxt->validate = 0;
13306     ctxt->loadsubset = 0;
13307     xmlDetectSAX2(ctxt);
13308 
13309     if ( doc != NULL ){
13310         content = doc->children;
13311         doc->children = NULL;
13312         xmlParseContent(ctxt);
13313         doc->children = content;
13314     }
13315     else {
13316         xmlParseContent(ctxt);
13317     }
13318     if ((RAW == '<') && (NXT(1) == '/')) {
13319 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13320     } else if (RAW != 0) {
13321 	xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13322     }
13323     if (ctxt->node != newDoc->children) {
13324 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13325     }
13326 
13327     if (!ctxt->wellFormed) {
13328         if (ctxt->errNo == 0)
13329 	    ret = 1;
13330 	else
13331 	    ret = ctxt->errNo;
13332     } else {
13333       ret = 0;
13334     }
13335 
13336     if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13337 	xmlNodePtr cur;
13338 
13339 	/*
13340 	 * Return the newly created nodeset after unlinking it from
13341 	 * they pseudo parent.
13342 	 */
13343 	cur = newDoc->children->children;
13344 	*lst = cur;
13345 	while (cur != NULL) {
13346 	    xmlSetTreeDoc(cur, doc);
13347 	    cur->parent = NULL;
13348 	    cur = cur->next;
13349 	}
13350 	newDoc->children->children = NULL;
13351     }
13352 
13353     if (sax != NULL)
13354 	ctxt->sax = oldsax;
13355     xmlFreeParserCtxt(ctxt);
13356     newDoc->intSubset = NULL;
13357     newDoc->extSubset = NULL;
13358     newDoc->oldNs = NULL;
13359     xmlFreeDoc(newDoc);
13360 
13361     return(ret);
13362 }
13363 
13364 /**
13365  * xmlSAXParseEntity:
13366  * @sax:  the SAX handler block
13367  * @filename:  the filename
13368  *
13369  * parse an XML external entity out of context and build a tree.
13370  * It use the given SAX function block to handle the parsing callback.
13371  * If sax is NULL, fallback to the default DOM tree building routines.
13372  *
13373  * [78] extParsedEnt ::= TextDecl? content
13374  *
13375  * This correspond to a "Well Balanced" chunk
13376  *
13377  * Returns the resulting document tree
13378  */
13379 
13380 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13381 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13382     xmlDocPtr ret;
13383     xmlParserCtxtPtr ctxt;
13384 
13385     ctxt = xmlCreateFileParserCtxt(filename);
13386     if (ctxt == NULL) {
13387 	return(NULL);
13388     }
13389     if (sax != NULL) {
13390 	if (ctxt->sax != NULL)
13391 	    xmlFree(ctxt->sax);
13392         ctxt->sax = sax;
13393         ctxt->userData = NULL;
13394     }
13395 
13396     xmlParseExtParsedEnt(ctxt);
13397 
13398     if (ctxt->wellFormed)
13399 	ret = ctxt->myDoc;
13400     else {
13401         ret = NULL;
13402         xmlFreeDoc(ctxt->myDoc);
13403         ctxt->myDoc = NULL;
13404     }
13405     if (sax != NULL)
13406         ctxt->sax = NULL;
13407     xmlFreeParserCtxt(ctxt);
13408 
13409     return(ret);
13410 }
13411 
13412 /**
13413  * xmlParseEntity:
13414  * @filename:  the filename
13415  *
13416  * parse an XML external entity out of context and build a tree.
13417  *
13418  * [78] extParsedEnt ::= TextDecl? content
13419  *
13420  * This correspond to a "Well Balanced" chunk
13421  *
13422  * Returns the resulting document tree
13423  */
13424 
13425 xmlDocPtr
xmlParseEntity(const char * filename)13426 xmlParseEntity(const char *filename) {
13427     return(xmlSAXParseEntity(NULL, filename));
13428 }
13429 #endif /* LIBXML_SAX1_ENABLED */
13430 
13431 /**
13432  * xmlCreateEntityParserCtxtInternal:
13433  * @URL:  the entity URL
13434  * @ID:  the entity PUBLIC ID
13435  * @base:  a possible base for the target URI
13436  * @pctx:  parser context used to set options on new context
13437  *
13438  * Create a parser context for an external entity
13439  * Automatic support for ZLIB/Compress compressed document is provided
13440  * by default if found at compile-time.
13441  *
13442  * Returns the new parser context or NULL
13443  */
13444 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13445 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13446 	                  const xmlChar *base, xmlParserCtxtPtr pctx) {
13447     xmlParserCtxtPtr ctxt;
13448     xmlParserInputPtr inputStream;
13449     char *directory = NULL;
13450     xmlChar *uri;
13451 
13452     ctxt = xmlNewParserCtxt();
13453     if (ctxt == NULL) {
13454 	return(NULL);
13455     }
13456 
13457     if (pctx != NULL) {
13458         ctxt->options = pctx->options;
13459         ctxt->_private = pctx->_private;
13460     }
13461 
13462     uri = xmlBuildURI(URL, base);
13463 
13464     if (uri == NULL) {
13465 	inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13466 	if (inputStream == NULL) {
13467 	    xmlFreeParserCtxt(ctxt);
13468 	    return(NULL);
13469 	}
13470 
13471 	inputPush(ctxt, inputStream);
13472 
13473 	if ((ctxt->directory == NULL) && (directory == NULL))
13474 	    directory = xmlParserGetDirectory((char *)URL);
13475 	if ((ctxt->directory == NULL) && (directory != NULL))
13476 	    ctxt->directory = directory;
13477     } else {
13478 	inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13479 	if (inputStream == NULL) {
13480 	    xmlFree(uri);
13481 	    xmlFreeParserCtxt(ctxt);
13482 	    return(NULL);
13483 	}
13484 
13485 	inputPush(ctxt, inputStream);
13486 
13487 	if ((ctxt->directory == NULL) && (directory == NULL))
13488 	    directory = xmlParserGetDirectory((char *)uri);
13489 	if ((ctxt->directory == NULL) && (directory != NULL))
13490 	    ctxt->directory = directory;
13491 	xmlFree(uri);
13492     }
13493     return(ctxt);
13494 }
13495 
13496 /**
13497  * xmlCreateEntityParserCtxt:
13498  * @URL:  the entity URL
13499  * @ID:  the entity PUBLIC ID
13500  * @base:  a possible base for the target URI
13501  *
13502  * Create a parser context for an external entity
13503  * Automatic support for ZLIB/Compress compressed document is provided
13504  * by default if found at compile-time.
13505  *
13506  * Returns the new parser context or NULL
13507  */
13508 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13509 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13510 	                  const xmlChar *base) {
13511     return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13512 
13513 }
13514 
13515 /************************************************************************
13516  *									*
13517  *		Front ends when parsing from a file			*
13518  *									*
13519  ************************************************************************/
13520 
13521 /**
13522  * xmlCreateURLParserCtxt:
13523  * @filename:  the filename or URL
13524  * @options:  a combination of xmlParserOption
13525  *
13526  * Create a parser context for a file or URL content.
13527  * Automatic support for ZLIB/Compress compressed document is provided
13528  * by default if found at compile-time and for file accesses
13529  *
13530  * Returns the new parser context or NULL
13531  */
13532 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13533 xmlCreateURLParserCtxt(const char *filename, int options)
13534 {
13535     xmlParserCtxtPtr ctxt;
13536     xmlParserInputPtr inputStream;
13537     char *directory = NULL;
13538 
13539     ctxt = xmlNewParserCtxt();
13540     if (ctxt == NULL) {
13541 	xmlErrMemory(NULL, "cannot allocate parser context");
13542 	return(NULL);
13543     }
13544 
13545     if (options)
13546 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13547     ctxt->linenumbers = 1;
13548 
13549     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13550     if (inputStream == NULL) {
13551 	xmlFreeParserCtxt(ctxt);
13552 	return(NULL);
13553     }
13554 
13555     inputPush(ctxt, inputStream);
13556     if ((ctxt->directory == NULL) && (directory == NULL))
13557         directory = xmlParserGetDirectory(filename);
13558     if ((ctxt->directory == NULL) && (directory != NULL))
13559         ctxt->directory = directory;
13560 
13561     return(ctxt);
13562 }
13563 
13564 /**
13565  * xmlCreateFileParserCtxt:
13566  * @filename:  the filename
13567  *
13568  * Create a parser context for a file content.
13569  * Automatic support for ZLIB/Compress compressed document is provided
13570  * by default if found at compile-time.
13571  *
13572  * Returns the new parser context or NULL
13573  */
13574 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13575 xmlCreateFileParserCtxt(const char *filename)
13576 {
13577     return(xmlCreateURLParserCtxt(filename, 0));
13578 }
13579 
13580 #ifdef LIBXML_SAX1_ENABLED
13581 /**
13582  * xmlSAXParseFileWithData:
13583  * @sax:  the SAX handler block
13584  * @filename:  the filename
13585  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13586  *             documents
13587  * @data:  the userdata
13588  *
13589  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13590  * compressed document is provided by default if found at compile-time.
13591  * It use the given SAX function block to handle the parsing callback.
13592  * If sax is NULL, fallback to the default DOM tree building routines.
13593  *
13594  * User data (void *) is stored within the parser context in the
13595  * context's _private member, so it is available nearly everywhere in libxml
13596  *
13597  * Returns the resulting document tree
13598  */
13599 
13600 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13601 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13602                         int recovery, void *data) {
13603     xmlDocPtr ret;
13604     xmlParserCtxtPtr ctxt;
13605 
13606     xmlInitParser();
13607 
13608     ctxt = xmlCreateFileParserCtxt(filename);
13609     if (ctxt == NULL) {
13610 	return(NULL);
13611     }
13612     if (sax != NULL) {
13613 	if (ctxt->sax != NULL)
13614 	    xmlFree(ctxt->sax);
13615         ctxt->sax = sax;
13616     }
13617     xmlDetectSAX2(ctxt);
13618     if (data!=NULL) {
13619 	ctxt->_private = data;
13620     }
13621 
13622     if (ctxt->directory == NULL)
13623         ctxt->directory = xmlParserGetDirectory(filename);
13624 
13625     ctxt->recovery = recovery;
13626 
13627     xmlParseDocument(ctxt);
13628 
13629     if ((ctxt->wellFormed) || recovery) {
13630         ret = ctxt->myDoc;
13631 	if (ret != NULL) {
13632 	    if (ctxt->input->buf->compressed > 0)
13633 		ret->compression = 9;
13634 	    else
13635 		ret->compression = ctxt->input->buf->compressed;
13636 	}
13637     }
13638     else {
13639        ret = NULL;
13640        xmlFreeDoc(ctxt->myDoc);
13641        ctxt->myDoc = NULL;
13642     }
13643     if (sax != NULL)
13644         ctxt->sax = NULL;
13645     xmlFreeParserCtxt(ctxt);
13646 
13647     return(ret);
13648 }
13649 
13650 /**
13651  * xmlSAXParseFile:
13652  * @sax:  the SAX handler block
13653  * @filename:  the filename
13654  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13655  *             documents
13656  *
13657  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13658  * compressed document is provided by default if found at compile-time.
13659  * It use the given SAX function block to handle the parsing callback.
13660  * If sax is NULL, fallback to the default DOM tree building routines.
13661  *
13662  * Returns the resulting document tree
13663  */
13664 
13665 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13666 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13667                           int recovery) {
13668     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13669 }
13670 
13671 /**
13672  * xmlRecoverDoc:
13673  * @cur:  a pointer to an array of xmlChar
13674  *
13675  * parse an XML in-memory document and build a tree.
13676  * In the case the document is not Well Formed, a attempt to build a
13677  * tree is tried anyway
13678  *
13679  * Returns the resulting document tree or NULL in case of failure
13680  */
13681 
13682 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)13683 xmlRecoverDoc(const xmlChar *cur) {
13684     return(xmlSAXParseDoc(NULL, cur, 1));
13685 }
13686 
13687 /**
13688  * xmlParseFile:
13689  * @filename:  the filename
13690  *
13691  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13692  * compressed document is provided by default if found at compile-time.
13693  *
13694  * Returns the resulting document tree if the file was wellformed,
13695  * NULL otherwise.
13696  */
13697 
13698 xmlDocPtr
xmlParseFile(const char * filename)13699 xmlParseFile(const char *filename) {
13700     return(xmlSAXParseFile(NULL, filename, 0));
13701 }
13702 
13703 /**
13704  * xmlRecoverFile:
13705  * @filename:  the filename
13706  *
13707  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13708  * compressed document is provided by default if found at compile-time.
13709  * In the case the document is not Well Formed, it attempts to build
13710  * a tree anyway
13711  *
13712  * Returns the resulting document tree or NULL in case of failure
13713  */
13714 
13715 xmlDocPtr
xmlRecoverFile(const char * filename)13716 xmlRecoverFile(const char *filename) {
13717     return(xmlSAXParseFile(NULL, filename, 1));
13718 }
13719 
13720 
13721 /**
13722  * xmlSetupParserForBuffer:
13723  * @ctxt:  an XML parser context
13724  * @buffer:  a xmlChar * buffer
13725  * @filename:  a file name
13726  *
13727  * Setup the parser context to parse a new buffer; Clears any prior
13728  * contents from the parser context. The buffer parameter must not be
13729  * NULL, but the filename parameter can be
13730  */
13731 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13732 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13733                              const char* filename)
13734 {
13735     xmlParserInputPtr input;
13736 
13737     if ((ctxt == NULL) || (buffer == NULL))
13738         return;
13739 
13740     input = xmlNewInputStream(ctxt);
13741     if (input == NULL) {
13742         xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13743         xmlClearParserCtxt(ctxt);
13744         return;
13745     }
13746 
13747     xmlClearParserCtxt(ctxt);
13748     if (filename != NULL)
13749         input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13750     input->base = buffer;
13751     input->cur = buffer;
13752     input->end = &buffer[xmlStrlen(buffer)];
13753     inputPush(ctxt, input);
13754 }
13755 
13756 /**
13757  * xmlSAXUserParseFile:
13758  * @sax:  a SAX handler
13759  * @user_data:  The user data returned on SAX callbacks
13760  * @filename:  a file name
13761  *
13762  * parse an XML file and call the given SAX handler routines.
13763  * Automatic support for ZLIB/Compress compressed document is provided
13764  *
13765  * Returns 0 in case of success or a error number otherwise
13766  */
13767 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13768 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13769                     const char *filename) {
13770     int ret = 0;
13771     xmlParserCtxtPtr ctxt;
13772 
13773     ctxt = xmlCreateFileParserCtxt(filename);
13774     if (ctxt == NULL) return -1;
13775     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13776 	xmlFree(ctxt->sax);
13777     ctxt->sax = sax;
13778     xmlDetectSAX2(ctxt);
13779 
13780     if (user_data != NULL)
13781 	ctxt->userData = user_data;
13782 
13783     xmlParseDocument(ctxt);
13784 
13785     if (ctxt->wellFormed)
13786 	ret = 0;
13787     else {
13788         if (ctxt->errNo != 0)
13789 	    ret = ctxt->errNo;
13790 	else
13791 	    ret = -1;
13792     }
13793     if (sax != NULL)
13794 	ctxt->sax = NULL;
13795     if (ctxt->myDoc != NULL) {
13796         xmlFreeDoc(ctxt->myDoc);
13797 	ctxt->myDoc = NULL;
13798     }
13799     xmlFreeParserCtxt(ctxt);
13800 
13801     return ret;
13802 }
13803 #endif /* LIBXML_SAX1_ENABLED */
13804 
13805 /************************************************************************
13806  *									*
13807  * 		Front ends when parsing from memory			*
13808  *									*
13809  ************************************************************************/
13810 
13811 /**
13812  * xmlCreateMemoryParserCtxt:
13813  * @buffer:  a pointer to a char array
13814  * @size:  the size of the array
13815  *
13816  * Create a parser context for an XML in-memory document.
13817  *
13818  * Returns the new parser context or NULL
13819  */
13820 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)13821 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13822     xmlParserCtxtPtr ctxt;
13823     xmlParserInputPtr input;
13824     xmlParserInputBufferPtr buf;
13825 
13826     if (buffer == NULL)
13827 	return(NULL);
13828     if (size <= 0)
13829 	return(NULL);
13830 
13831     ctxt = xmlNewParserCtxt();
13832     if (ctxt == NULL)
13833 	return(NULL);
13834 
13835     /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13836     buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13837     if (buf == NULL) {
13838 	xmlFreeParserCtxt(ctxt);
13839 	return(NULL);
13840     }
13841 
13842     input = xmlNewInputStream(ctxt);
13843     if (input == NULL) {
13844 	xmlFreeParserInputBuffer(buf);
13845 	xmlFreeParserCtxt(ctxt);
13846 	return(NULL);
13847     }
13848 
13849     input->filename = NULL;
13850     input->buf = buf;
13851     input->base = input->buf->buffer->content;
13852     input->cur = input->buf->buffer->content;
13853     input->end = &input->buf->buffer->content[input->buf->buffer->use];
13854 
13855     inputPush(ctxt, input);
13856     return(ctxt);
13857 }
13858 
13859 #ifdef LIBXML_SAX1_ENABLED
13860 /**
13861  * xmlSAXParseMemoryWithData:
13862  * @sax:  the SAX handler block
13863  * @buffer:  an pointer to a char array
13864  * @size:  the size of the array
13865  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13866  *             documents
13867  * @data:  the userdata
13868  *
13869  * parse an XML in-memory block and use the given SAX function block
13870  * to handle the parsing callback. If sax is NULL, fallback to the default
13871  * DOM tree building routines.
13872  *
13873  * User data (void *) is stored within the parser context in the
13874  * context's _private member, so it is available nearly everywhere in libxml
13875  *
13876  * Returns the resulting document tree
13877  */
13878 
13879 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13880 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13881 	          int size, int recovery, void *data) {
13882     xmlDocPtr ret;
13883     xmlParserCtxtPtr ctxt;
13884 
13885     xmlInitParser();
13886 
13887     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13888     if (ctxt == NULL) return(NULL);
13889     if (sax != NULL) {
13890 	if (ctxt->sax != NULL)
13891 	    xmlFree(ctxt->sax);
13892         ctxt->sax = sax;
13893     }
13894     xmlDetectSAX2(ctxt);
13895     if (data!=NULL) {
13896 	ctxt->_private=data;
13897     }
13898 
13899     ctxt->recovery = recovery;
13900 
13901     xmlParseDocument(ctxt);
13902 
13903     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13904     else {
13905        ret = NULL;
13906        xmlFreeDoc(ctxt->myDoc);
13907        ctxt->myDoc = NULL;
13908     }
13909     if (sax != NULL)
13910 	ctxt->sax = NULL;
13911     xmlFreeParserCtxt(ctxt);
13912 
13913     return(ret);
13914 }
13915 
13916 /**
13917  * xmlSAXParseMemory:
13918  * @sax:  the SAX handler block
13919  * @buffer:  an pointer to a char array
13920  * @size:  the size of the array
13921  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13922  *             documents
13923  *
13924  * parse an XML in-memory block and use the given SAX function block
13925  * to handle the parsing callback. If sax is NULL, fallback to the default
13926  * DOM tree building routines.
13927  *
13928  * Returns the resulting document tree
13929  */
13930 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13931 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13932 	          int size, int recovery) {
13933     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13934 }
13935 
13936 /**
13937  * xmlParseMemory:
13938  * @buffer:  an pointer to a char array
13939  * @size:  the size of the array
13940  *
13941  * parse an XML in-memory block and build a tree.
13942  *
13943  * Returns the resulting document tree
13944  */
13945 
xmlParseMemory(const char * buffer,int size)13946 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13947    return(xmlSAXParseMemory(NULL, buffer, size, 0));
13948 }
13949 
13950 /**
13951  * xmlRecoverMemory:
13952  * @buffer:  an pointer to a char array
13953  * @size:  the size of the array
13954  *
13955  * parse an XML in-memory block and build a tree.
13956  * In the case the document is not Well Formed, an attempt to
13957  * build a tree is tried anyway
13958  *
13959  * Returns the resulting document tree or NULL in case of error
13960  */
13961 
xmlRecoverMemory(const char * buffer,int size)13962 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13963    return(xmlSAXParseMemory(NULL, buffer, size, 1));
13964 }
13965 
13966 /**
13967  * xmlSAXUserParseMemory:
13968  * @sax:  a SAX handler
13969  * @user_data:  The user data returned on SAX callbacks
13970  * @buffer:  an in-memory XML document input
13971  * @size:  the length of the XML document in bytes
13972  *
13973  * A better SAX parsing routine.
13974  * parse an XML in-memory buffer and call the given SAX handler routines.
13975  *
13976  * Returns 0 in case of success or a error number otherwise
13977  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13978 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13979 			  const char *buffer, int size) {
13980     int ret = 0;
13981     xmlParserCtxtPtr ctxt;
13982 
13983     xmlInitParser();
13984 
13985     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13986     if (ctxt == NULL) return -1;
13987     if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13988         xmlFree(ctxt->sax);
13989     ctxt->sax = sax;
13990     xmlDetectSAX2(ctxt);
13991 
13992     if (user_data != NULL)
13993 	ctxt->userData = user_data;
13994 
13995     xmlParseDocument(ctxt);
13996 
13997     if (ctxt->wellFormed)
13998 	ret = 0;
13999     else {
14000         if (ctxt->errNo != 0)
14001 	    ret = ctxt->errNo;
14002 	else
14003 	    ret = -1;
14004     }
14005     if (sax != NULL)
14006         ctxt->sax = NULL;
14007     if (ctxt->myDoc != NULL) {
14008         xmlFreeDoc(ctxt->myDoc);
14009 	ctxt->myDoc = NULL;
14010     }
14011     xmlFreeParserCtxt(ctxt);
14012 
14013     return ret;
14014 }
14015 #endif /* LIBXML_SAX1_ENABLED */
14016 
14017 /**
14018  * xmlCreateDocParserCtxt:
14019  * @cur:  a pointer to an array of xmlChar
14020  *
14021  * Creates a parser context for an XML in-memory document.
14022  *
14023  * Returns the new parser context or NULL
14024  */
14025 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14026 xmlCreateDocParserCtxt(const xmlChar *cur) {
14027     int len;
14028 
14029     if (cur == NULL)
14030 	return(NULL);
14031     len = xmlStrlen(cur);
14032     return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14033 }
14034 
14035 #ifdef LIBXML_SAX1_ENABLED
14036 /**
14037  * xmlSAXParseDoc:
14038  * @sax:  the SAX handler block
14039  * @cur:  a pointer to an array of xmlChar
14040  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14041  *             documents
14042  *
14043  * parse an XML in-memory document and build a tree.
14044  * It use the given SAX function block to handle the parsing callback.
14045  * If sax is NULL, fallback to the default DOM tree building routines.
14046  *
14047  * Returns the resulting document tree
14048  */
14049 
14050 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14051 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14052     xmlDocPtr ret;
14053     xmlParserCtxtPtr ctxt;
14054     xmlSAXHandlerPtr oldsax = NULL;
14055 
14056     if (cur == NULL) return(NULL);
14057 
14058 
14059     ctxt = xmlCreateDocParserCtxt(cur);
14060     if (ctxt == NULL) return(NULL);
14061     if (sax != NULL) {
14062         oldsax = ctxt->sax;
14063         ctxt->sax = sax;
14064         ctxt->userData = NULL;
14065     }
14066     xmlDetectSAX2(ctxt);
14067 
14068     xmlParseDocument(ctxt);
14069     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14070     else {
14071        ret = NULL;
14072        xmlFreeDoc(ctxt->myDoc);
14073        ctxt->myDoc = NULL;
14074     }
14075     if (sax != NULL)
14076 	ctxt->sax = oldsax;
14077     xmlFreeParserCtxt(ctxt);
14078 
14079     return(ret);
14080 }
14081 
14082 /**
14083  * xmlParseDoc:
14084  * @cur:  a pointer to an array of xmlChar
14085  *
14086  * parse an XML in-memory document and build a tree.
14087  *
14088  * Returns the resulting document tree
14089  */
14090 
14091 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14092 xmlParseDoc(const xmlChar *cur) {
14093     return(xmlSAXParseDoc(NULL, cur, 0));
14094 }
14095 #endif /* LIBXML_SAX1_ENABLED */
14096 
14097 #ifdef LIBXML_LEGACY_ENABLED
14098 /************************************************************************
14099  *									*
14100  * 	Specific function to keep track of entities references		*
14101  * 	and used by the XSLT debugger					*
14102  *									*
14103  ************************************************************************/
14104 
14105 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14106 
14107 /**
14108  * xmlAddEntityReference:
14109  * @ent : A valid entity
14110  * @firstNode : A valid first node for children of entity
14111  * @lastNode : A valid last node of children entity
14112  *
14113  * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14114  */
14115 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14116 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14117                       xmlNodePtr lastNode)
14118 {
14119     if (xmlEntityRefFunc != NULL) {
14120         (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14121     }
14122 }
14123 
14124 
14125 /**
14126  * xmlSetEntityReferenceFunc:
14127  * @func: A valid function
14128  *
14129  * Set the function to call call back when a xml reference has been made
14130  */
14131 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14132 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14133 {
14134     xmlEntityRefFunc = func;
14135 }
14136 #endif /* LIBXML_LEGACY_ENABLED */
14137 
14138 /************************************************************************
14139  *									*
14140  * 				Miscellaneous				*
14141  *									*
14142  ************************************************************************/
14143 
14144 #ifdef LIBXML_XPATH_ENABLED
14145 #include <libxml/xpath.h>
14146 #endif
14147 
14148 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14149 static int xmlParserInitialized = 0;
14150 
14151 /**
14152  * xmlInitParser:
14153  *
14154  * Initialization function for the XML parser.
14155  * This is not reentrant. Call once before processing in case of
14156  * use in multithreaded programs.
14157  */
14158 
14159 void
xmlInitParser(void)14160 xmlInitParser(void) {
14161     if (xmlParserInitialized != 0)
14162 	return;
14163 
14164 #ifdef LIBXML_THREAD_ENABLED
14165     __xmlGlobalInitMutexLock();
14166     if (xmlParserInitialized == 0) {
14167 #endif
14168 	xmlInitGlobals();
14169 	xmlInitThreads();
14170 	if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14171 	    (xmlGenericError == NULL))
14172 	    initGenericErrorDefaultFunc(NULL);
14173 	xmlInitMemory();
14174 	xmlInitCharEncodingHandlers();
14175 	xmlDefaultSAXHandlerInit();
14176 	xmlRegisterDefaultInputCallbacks();
14177 #ifdef LIBXML_OUTPUT_ENABLED
14178 	xmlRegisterDefaultOutputCallbacks();
14179 #endif /* LIBXML_OUTPUT_ENABLED */
14180 #ifdef LIBXML_HTML_ENABLED
14181 	htmlInitAutoClose();
14182 	htmlDefaultSAXHandlerInit();
14183 #endif
14184 #ifdef LIBXML_XPATH_ENABLED
14185 	xmlXPathInit();
14186 #endif
14187 	xmlParserInitialized = 1;
14188 #ifdef LIBXML_THREAD_ENABLED
14189     }
14190     __xmlGlobalInitMutexUnlock();
14191 #endif
14192 }
14193 
14194 /**
14195  * xmlCleanupParser:
14196  *
14197  * This function name is somewhat misleading. It does not clean up
14198  * parser state, it cleans up memory allocated by the library itself.
14199  * It is a cleanup function for the XML library. It tries to reclaim all
14200  * related global memory allocated for the library processing.
14201  * It doesn't deallocate any document related memory. One should
14202  * call xmlCleanupParser() only when the process has finished using
14203  * the library and all XML/HTML documents built with it.
14204  * See also xmlInitParser() which has the opposite function of preparing
14205  * the library for operations.
14206  *
14207  * WARNING: if your application is multithreaded or has plugin support
14208  *          calling this may crash the application if another thread or
14209  *          a plugin is still using libxml2. It's sometimes very hard to
14210  *          guess if libxml2 is in use in the application, some libraries
14211  *          or plugins may use it without notice. In case of doubt abstain
14212  *          from calling this function or do it just before calling exit()
14213  *          to avoid leak reports from valgrind !
14214  */
14215 
14216 void
xmlCleanupParser(void)14217 xmlCleanupParser(void) {
14218     if (!xmlParserInitialized)
14219 	return;
14220 
14221     xmlCleanupCharEncodingHandlers();
14222 #ifdef LIBXML_CATALOG_ENABLED
14223     xmlCatalogCleanup();
14224 #endif
14225     xmlDictCleanup();
14226     xmlCleanupInputCallbacks();
14227 #ifdef LIBXML_OUTPUT_ENABLED
14228     xmlCleanupOutputCallbacks();
14229 #endif
14230 #ifdef LIBXML_SCHEMAS_ENABLED
14231     xmlSchemaCleanupTypes();
14232     xmlRelaxNGCleanupTypes();
14233 #endif
14234     xmlCleanupGlobals();
14235     xmlResetLastError();
14236     xmlCleanupThreads(); /* must be last if called not from the main thread */
14237     xmlCleanupMemory();
14238     xmlParserInitialized = 0;
14239 }
14240 
14241 /************************************************************************
14242  *									*
14243  *	New set (2.6.0) of simpler and more flexible APIs		*
14244  *									*
14245  ************************************************************************/
14246 
14247 /**
14248  * DICT_FREE:
14249  * @str:  a string
14250  *
14251  * Free a string if it is not owned by the "dict" dictionnary in the
14252  * current scope
14253  */
14254 #define DICT_FREE(str)						\
14255 	if ((str) && ((!dict) || 				\
14256 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
14257 	    xmlFree((char *)(str));
14258 
14259 /**
14260  * xmlCtxtReset:
14261  * @ctxt: an XML parser context
14262  *
14263  * Reset a parser context
14264  */
14265 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14266 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14267 {
14268     xmlParserInputPtr input;
14269     xmlDictPtr dict;
14270 
14271     if (ctxt == NULL)
14272         return;
14273 
14274     dict = ctxt->dict;
14275 
14276     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14277         xmlFreeInputStream(input);
14278     }
14279     ctxt->inputNr = 0;
14280     ctxt->input = NULL;
14281 
14282     ctxt->spaceNr = 0;
14283     if (ctxt->spaceTab != NULL) {
14284 	ctxt->spaceTab[0] = -1;
14285 	ctxt->space = &ctxt->spaceTab[0];
14286     } else {
14287         ctxt->space = NULL;
14288     }
14289 
14290 
14291     ctxt->nodeNr = 0;
14292     ctxt->node = NULL;
14293 
14294     ctxt->nameNr = 0;
14295     ctxt->name = NULL;
14296 
14297     DICT_FREE(ctxt->version);
14298     ctxt->version = NULL;
14299     DICT_FREE(ctxt->encoding);
14300     ctxt->encoding = NULL;
14301     DICT_FREE(ctxt->directory);
14302     ctxt->directory = NULL;
14303     DICT_FREE(ctxt->extSubURI);
14304     ctxt->extSubURI = NULL;
14305     DICT_FREE(ctxt->extSubSystem);
14306     ctxt->extSubSystem = NULL;
14307     if (ctxt->myDoc != NULL)
14308         xmlFreeDoc(ctxt->myDoc);
14309     ctxt->myDoc = NULL;
14310 
14311     ctxt->standalone = -1;
14312     ctxt->hasExternalSubset = 0;
14313     ctxt->hasPErefs = 0;
14314     ctxt->html = 0;
14315     ctxt->external = 0;
14316     ctxt->instate = XML_PARSER_START;
14317     ctxt->token = 0;
14318 
14319     ctxt->wellFormed = 1;
14320     ctxt->nsWellFormed = 1;
14321     ctxt->disableSAX = 0;
14322     ctxt->valid = 1;
14323 #if 0
14324     ctxt->vctxt.userData = ctxt;
14325     ctxt->vctxt.error = xmlParserValidityError;
14326     ctxt->vctxt.warning = xmlParserValidityWarning;
14327 #endif
14328     ctxt->record_info = 0;
14329     ctxt->nbChars = 0;
14330     ctxt->checkIndex = 0;
14331     ctxt->inSubset = 0;
14332     ctxt->errNo = XML_ERR_OK;
14333     ctxt->depth = 0;
14334     ctxt->charset = XML_CHAR_ENCODING_UTF8;
14335     ctxt->catalogs = NULL;
14336     ctxt->nbentities = 0;
14337     ctxt->sizeentities = 0;
14338     xmlInitNodeInfoSeq(&ctxt->node_seq);
14339 
14340     if (ctxt->attsDefault != NULL) {
14341         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14342         ctxt->attsDefault = NULL;
14343     }
14344     if (ctxt->attsSpecial != NULL) {
14345         xmlHashFree(ctxt->attsSpecial, NULL);
14346         ctxt->attsSpecial = NULL;
14347     }
14348 
14349 #ifdef LIBXML_CATALOG_ENABLED
14350     if (ctxt->catalogs != NULL)
14351 	xmlCatalogFreeLocal(ctxt->catalogs);
14352 #endif
14353     if (ctxt->lastError.code != XML_ERR_OK)
14354         xmlResetError(&ctxt->lastError);
14355 }
14356 
14357 /**
14358  * xmlCtxtResetPush:
14359  * @ctxt: an XML parser context
14360  * @chunk:  a pointer to an array of chars
14361  * @size:  number of chars in the array
14362  * @filename:  an optional file name or URI
14363  * @encoding:  the document encoding, or NULL
14364  *
14365  * Reset a push parser context
14366  *
14367  * Returns 0 in case of success and 1 in case of error
14368  */
14369 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14370 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14371                  int size, const char *filename, const char *encoding)
14372 {
14373     xmlParserInputPtr inputStream;
14374     xmlParserInputBufferPtr buf;
14375     xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14376 
14377     if (ctxt == NULL)
14378         return(1);
14379 
14380     if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14381         enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14382 
14383     buf = xmlAllocParserInputBuffer(enc);
14384     if (buf == NULL)
14385         return(1);
14386 
14387     if (ctxt == NULL) {
14388         xmlFreeParserInputBuffer(buf);
14389         return(1);
14390     }
14391 
14392     xmlCtxtReset(ctxt);
14393 
14394     if (ctxt->pushTab == NULL) {
14395         ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14396 	                                    sizeof(xmlChar *));
14397         if (ctxt->pushTab == NULL) {
14398 	    xmlErrMemory(ctxt, NULL);
14399             xmlFreeParserInputBuffer(buf);
14400             return(1);
14401         }
14402     }
14403 
14404     if (filename == NULL) {
14405         ctxt->directory = NULL;
14406     } else {
14407         ctxt->directory = xmlParserGetDirectory(filename);
14408     }
14409 
14410     inputStream = xmlNewInputStream(ctxt);
14411     if (inputStream == NULL) {
14412         xmlFreeParserInputBuffer(buf);
14413         return(1);
14414     }
14415 
14416     if (filename == NULL)
14417         inputStream->filename = NULL;
14418     else
14419         inputStream->filename = (char *)
14420             xmlCanonicPath((const xmlChar *) filename);
14421     inputStream->buf = buf;
14422     inputStream->base = inputStream->buf->buffer->content;
14423     inputStream->cur = inputStream->buf->buffer->content;
14424     inputStream->end =
14425         &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14426 
14427     inputPush(ctxt, inputStream);
14428 
14429     if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14430         (ctxt->input->buf != NULL)) {
14431         int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14432         int cur = ctxt->input->cur - ctxt->input->base;
14433 
14434         xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14435 
14436         ctxt->input->base = ctxt->input->buf->buffer->content + base;
14437         ctxt->input->cur = ctxt->input->base + cur;
14438         ctxt->input->end =
14439             &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14440                                                use];
14441 #ifdef DEBUG_PUSH
14442         xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14443 #endif
14444     }
14445 
14446     if (encoding != NULL) {
14447         xmlCharEncodingHandlerPtr hdlr;
14448 
14449         if (ctxt->encoding != NULL)
14450 	    xmlFree((xmlChar *) ctxt->encoding);
14451         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14452 
14453         hdlr = xmlFindCharEncodingHandler(encoding);
14454         if (hdlr != NULL) {
14455             xmlSwitchToEncoding(ctxt, hdlr);
14456 	} else {
14457 	    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14458 			      "Unsupported encoding %s\n", BAD_CAST encoding);
14459         }
14460     } else if (enc != XML_CHAR_ENCODING_NONE) {
14461         xmlSwitchEncoding(ctxt, enc);
14462     }
14463 
14464     return(0);
14465 }
14466 
14467 
14468 /**
14469  * xmlCtxtUseOptionsInternal:
14470  * @ctxt: an XML parser context
14471  * @options:  a combination of xmlParserOption
14472  * @encoding:  the user provided encoding to use
14473  *
14474  * Applies the options to the parser context
14475  *
14476  * Returns 0 in case of success, the set of unknown or unimplemented options
14477  *         in case of error.
14478  */
14479 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14480 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14481 {
14482     if (ctxt == NULL)
14483         return(-1);
14484     if (encoding != NULL) {
14485         if (ctxt->encoding != NULL)
14486 	    xmlFree((xmlChar *) ctxt->encoding);
14487         ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14488     }
14489     if (options & XML_PARSE_RECOVER) {
14490         ctxt->recovery = 1;
14491         options -= XML_PARSE_RECOVER;
14492 	ctxt->options |= XML_PARSE_RECOVER;
14493     } else
14494         ctxt->recovery = 0;
14495     if (options & XML_PARSE_DTDLOAD) {
14496         ctxt->loadsubset = XML_DETECT_IDS;
14497         options -= XML_PARSE_DTDLOAD;
14498 	ctxt->options |= XML_PARSE_DTDLOAD;
14499     } else
14500         ctxt->loadsubset = 0;
14501     if (options & XML_PARSE_DTDATTR) {
14502         ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14503         options -= XML_PARSE_DTDATTR;
14504 	ctxt->options |= XML_PARSE_DTDATTR;
14505     }
14506     if (options & XML_PARSE_NOENT) {
14507         ctxt->replaceEntities = 1;
14508         /* ctxt->loadsubset |= XML_DETECT_IDS; */
14509         options -= XML_PARSE_NOENT;
14510 	ctxt->options |= XML_PARSE_NOENT;
14511     } else
14512         ctxt->replaceEntities = 0;
14513     if (options & XML_PARSE_PEDANTIC) {
14514         ctxt->pedantic = 1;
14515         options -= XML_PARSE_PEDANTIC;
14516 	ctxt->options |= XML_PARSE_PEDANTIC;
14517     } else
14518         ctxt->pedantic = 0;
14519     if (options & XML_PARSE_NOBLANKS) {
14520         ctxt->keepBlanks = 0;
14521         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14522         options -= XML_PARSE_NOBLANKS;
14523 	ctxt->options |= XML_PARSE_NOBLANKS;
14524     } else
14525         ctxt->keepBlanks = 1;
14526     if (options & XML_PARSE_DTDVALID) {
14527         ctxt->validate = 1;
14528         if (options & XML_PARSE_NOWARNING)
14529             ctxt->vctxt.warning = NULL;
14530         if (options & XML_PARSE_NOERROR)
14531             ctxt->vctxt.error = NULL;
14532         options -= XML_PARSE_DTDVALID;
14533 	ctxt->options |= XML_PARSE_DTDVALID;
14534     } else
14535         ctxt->validate = 0;
14536     if (options & XML_PARSE_NOWARNING) {
14537         ctxt->sax->warning = NULL;
14538         options -= XML_PARSE_NOWARNING;
14539     }
14540     if (options & XML_PARSE_NOERROR) {
14541         ctxt->sax->error = NULL;
14542         ctxt->sax->fatalError = NULL;
14543         options -= XML_PARSE_NOERROR;
14544     }
14545 #ifdef LIBXML_SAX1_ENABLED
14546     if (options & XML_PARSE_SAX1) {
14547         ctxt->sax->startElement = xmlSAX2StartElement;
14548         ctxt->sax->endElement = xmlSAX2EndElement;
14549         ctxt->sax->startElementNs = NULL;
14550         ctxt->sax->endElementNs = NULL;
14551         ctxt->sax->initialized = 1;
14552         options -= XML_PARSE_SAX1;
14553 	ctxt->options |= XML_PARSE_SAX1;
14554     }
14555 #endif /* LIBXML_SAX1_ENABLED */
14556     if (options & XML_PARSE_NODICT) {
14557         ctxt->dictNames = 0;
14558         options -= XML_PARSE_NODICT;
14559 	ctxt->options |= XML_PARSE_NODICT;
14560     } else {
14561         ctxt->dictNames = 1;
14562     }
14563     if (options & XML_PARSE_NOCDATA) {
14564         ctxt->sax->cdataBlock = NULL;
14565         options -= XML_PARSE_NOCDATA;
14566 	ctxt->options |= XML_PARSE_NOCDATA;
14567     }
14568     if (options & XML_PARSE_NSCLEAN) {
14569 	ctxt->options |= XML_PARSE_NSCLEAN;
14570         options -= XML_PARSE_NSCLEAN;
14571     }
14572     if (options & XML_PARSE_NONET) {
14573 	ctxt->options |= XML_PARSE_NONET;
14574         options -= XML_PARSE_NONET;
14575     }
14576     if (options & XML_PARSE_COMPACT) {
14577 	ctxt->options |= XML_PARSE_COMPACT;
14578         options -= XML_PARSE_COMPACT;
14579     }
14580     if (options & XML_PARSE_OLD10) {
14581 	ctxt->options |= XML_PARSE_OLD10;
14582         options -= XML_PARSE_OLD10;
14583     }
14584     if (options & XML_PARSE_NOBASEFIX) {
14585 	ctxt->options |= XML_PARSE_NOBASEFIX;
14586         options -= XML_PARSE_NOBASEFIX;
14587     }
14588     if (options & XML_PARSE_HUGE) {
14589 	ctxt->options |= XML_PARSE_HUGE;
14590         options -= XML_PARSE_HUGE;
14591     }
14592     if (options & XML_PARSE_OLDSAX) {
14593 	ctxt->options |= XML_PARSE_OLDSAX;
14594         options -= XML_PARSE_OLDSAX;
14595     }
14596     ctxt->linenumbers = 1;
14597     return (options);
14598 }
14599 
14600 /**
14601  * xmlCtxtUseOptions:
14602  * @ctxt: an XML parser context
14603  * @options:  a combination of xmlParserOption
14604  *
14605  * Applies the options to the parser context
14606  *
14607  * Returns 0 in case of success, the set of unknown or unimplemented options
14608  *         in case of error.
14609  */
14610 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14611 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14612 {
14613    return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14614 }
14615 
14616 /**
14617  * xmlDoRead:
14618  * @ctxt:  an XML parser context
14619  * @URL:  the base URL to use for the document
14620  * @encoding:  the document encoding, or NULL
14621  * @options:  a combination of xmlParserOption
14622  * @reuse:  keep the context for reuse
14623  *
14624  * Common front-end for the xmlRead functions
14625  *
14626  * Returns the resulting document tree or NULL
14627  */
14628 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14629 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14630           int options, int reuse)
14631 {
14632     xmlDocPtr ret;
14633 
14634     xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14635     if (encoding != NULL) {
14636         xmlCharEncodingHandlerPtr hdlr;
14637 
14638 	hdlr = xmlFindCharEncodingHandler(encoding);
14639 	if (hdlr != NULL)
14640 	    xmlSwitchToEncoding(ctxt, hdlr);
14641     }
14642     if ((URL != NULL) && (ctxt->input != NULL) &&
14643         (ctxt->input->filename == NULL))
14644         ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14645     xmlParseDocument(ctxt);
14646     if ((ctxt->wellFormed) || ctxt->recovery)
14647         ret = ctxt->myDoc;
14648     else {
14649         ret = NULL;
14650 	if (ctxt->myDoc != NULL) {
14651 	    xmlFreeDoc(ctxt->myDoc);
14652 	}
14653     }
14654     ctxt->myDoc = NULL;
14655     if (!reuse) {
14656 	xmlFreeParserCtxt(ctxt);
14657     }
14658 
14659     return (ret);
14660 }
14661 
14662 /**
14663  * xmlReadDoc:
14664  * @cur:  a pointer to a zero terminated string
14665  * @URL:  the base URL to use for the document
14666  * @encoding:  the document encoding, or NULL
14667  * @options:  a combination of xmlParserOption
14668  *
14669  * parse an XML in-memory document and build a tree.
14670  *
14671  * Returns the resulting document tree
14672  */
14673 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14674 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14675 {
14676     xmlParserCtxtPtr ctxt;
14677 
14678     if (cur == NULL)
14679         return (NULL);
14680 
14681     ctxt = xmlCreateDocParserCtxt(cur);
14682     if (ctxt == NULL)
14683         return (NULL);
14684     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14685 }
14686 
14687 /**
14688  * xmlReadFile:
14689  * @filename:  a file or URL
14690  * @encoding:  the document encoding, or NULL
14691  * @options:  a combination of xmlParserOption
14692  *
14693  * parse an XML file from the filesystem or the network.
14694  *
14695  * Returns the resulting document tree
14696  */
14697 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14698 xmlReadFile(const char *filename, const char *encoding, int options)
14699 {
14700     xmlParserCtxtPtr ctxt;
14701 
14702     ctxt = xmlCreateURLParserCtxt(filename, options);
14703     if (ctxt == NULL)
14704         return (NULL);
14705     return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14706 }
14707 
14708 /**
14709  * xmlReadMemory:
14710  * @buffer:  a pointer to a char array
14711  * @size:  the size of the array
14712  * @URL:  the base URL to use for the document
14713  * @encoding:  the document encoding, or NULL
14714  * @options:  a combination of xmlParserOption
14715  *
14716  * parse an XML in-memory document and build a tree.
14717  *
14718  * Returns the resulting document tree
14719  */
14720 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14721 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14722 {
14723     xmlParserCtxtPtr ctxt;
14724 
14725     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14726     if (ctxt == NULL)
14727         return (NULL);
14728     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14729 }
14730 
14731 /**
14732  * xmlReadFd:
14733  * @fd:  an open file descriptor
14734  * @URL:  the base URL to use for the document
14735  * @encoding:  the document encoding, or NULL
14736  * @options:  a combination of xmlParserOption
14737  *
14738  * parse an XML from a file descriptor and build a tree.
14739  * NOTE that the file descriptor will not be closed when the
14740  *      reader is closed or reset.
14741  *
14742  * Returns the resulting document tree
14743  */
14744 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14745 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14746 {
14747     xmlParserCtxtPtr ctxt;
14748     xmlParserInputBufferPtr input;
14749     xmlParserInputPtr stream;
14750 
14751     if (fd < 0)
14752         return (NULL);
14753 
14754     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14755     if (input == NULL)
14756         return (NULL);
14757     input->closecallback = NULL;
14758     ctxt = xmlNewParserCtxt();
14759     if (ctxt == NULL) {
14760         xmlFreeParserInputBuffer(input);
14761         return (NULL);
14762     }
14763     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14764     if (stream == NULL) {
14765         xmlFreeParserInputBuffer(input);
14766 	xmlFreeParserCtxt(ctxt);
14767         return (NULL);
14768     }
14769     inputPush(ctxt, stream);
14770     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14771 }
14772 
14773 /**
14774  * xmlReadIO:
14775  * @ioread:  an I/O read function
14776  * @ioclose:  an I/O close function
14777  * @ioctx:  an I/O handler
14778  * @URL:  the base URL to use for the document
14779  * @encoding:  the document encoding, or NULL
14780  * @options:  a combination of xmlParserOption
14781  *
14782  * parse an XML document from I/O functions and source and build a tree.
14783  *
14784  * Returns the resulting document tree
14785  */
14786 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14787 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14788           void *ioctx, const char *URL, const char *encoding, int options)
14789 {
14790     xmlParserCtxtPtr ctxt;
14791     xmlParserInputBufferPtr input;
14792     xmlParserInputPtr stream;
14793 
14794     if (ioread == NULL)
14795         return (NULL);
14796 
14797     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14798                                          XML_CHAR_ENCODING_NONE);
14799     if (input == NULL)
14800         return (NULL);
14801     ctxt = xmlNewParserCtxt();
14802     if (ctxt == NULL) {
14803         xmlFreeParserInputBuffer(input);
14804         return (NULL);
14805     }
14806     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14807     if (stream == NULL) {
14808         xmlFreeParserInputBuffer(input);
14809 	xmlFreeParserCtxt(ctxt);
14810         return (NULL);
14811     }
14812     inputPush(ctxt, stream);
14813     return (xmlDoRead(ctxt, URL, encoding, options, 0));
14814 }
14815 
14816 /**
14817  * xmlCtxtReadDoc:
14818  * @ctxt:  an XML parser context
14819  * @cur:  a pointer to a zero terminated string
14820  * @URL:  the base URL to use for the document
14821  * @encoding:  the document encoding, or NULL
14822  * @options:  a combination of xmlParserOption
14823  *
14824  * parse an XML in-memory document and build a tree.
14825  * This reuses the existing @ctxt parser context
14826  *
14827  * Returns the resulting document tree
14828  */
14829 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)14830 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14831                const char *URL, const char *encoding, int options)
14832 {
14833     xmlParserInputPtr stream;
14834 
14835     if (cur == NULL)
14836         return (NULL);
14837     if (ctxt == NULL)
14838         return (NULL);
14839 
14840     xmlCtxtReset(ctxt);
14841 
14842     stream = xmlNewStringInputStream(ctxt, cur);
14843     if (stream == NULL) {
14844         return (NULL);
14845     }
14846     inputPush(ctxt, stream);
14847     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14848 }
14849 
14850 /**
14851  * xmlCtxtReadFile:
14852  * @ctxt:  an XML parser context
14853  * @filename:  a file or URL
14854  * @encoding:  the document encoding, or NULL
14855  * @options:  a combination of xmlParserOption
14856  *
14857  * parse an XML file from the filesystem or the network.
14858  * This reuses the existing @ctxt parser context
14859  *
14860  * Returns the resulting document tree
14861  */
14862 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)14863 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14864                 const char *encoding, int options)
14865 {
14866     xmlParserInputPtr stream;
14867 
14868     if (filename == NULL)
14869         return (NULL);
14870     if (ctxt == NULL)
14871         return (NULL);
14872 
14873     xmlCtxtReset(ctxt);
14874 
14875     stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14876     if (stream == NULL) {
14877         return (NULL);
14878     }
14879     inputPush(ctxt, stream);
14880     return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14881 }
14882 
14883 /**
14884  * xmlCtxtReadMemory:
14885  * @ctxt:  an XML parser context
14886  * @buffer:  a pointer to a char array
14887  * @size:  the size of the array
14888  * @URL:  the base URL to use for the document
14889  * @encoding:  the document encoding, or NULL
14890  * @options:  a combination of xmlParserOption
14891  *
14892  * parse an XML in-memory document and build a tree.
14893  * This reuses the existing @ctxt parser context
14894  *
14895  * Returns the resulting document tree
14896  */
14897 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14898 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14899                   const char *URL, const char *encoding, int options)
14900 {
14901     xmlParserInputBufferPtr input;
14902     xmlParserInputPtr stream;
14903 
14904     if (ctxt == NULL)
14905         return (NULL);
14906     if (buffer == NULL)
14907         return (NULL);
14908 
14909     xmlCtxtReset(ctxt);
14910 
14911     input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14912     if (input == NULL) {
14913 	return(NULL);
14914     }
14915 
14916     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14917     if (stream == NULL) {
14918 	xmlFreeParserInputBuffer(input);
14919 	return(NULL);
14920     }
14921 
14922     inputPush(ctxt, stream);
14923     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14924 }
14925 
14926 /**
14927  * xmlCtxtReadFd:
14928  * @ctxt:  an XML parser context
14929  * @fd:  an open file descriptor
14930  * @URL:  the base URL to use for the document
14931  * @encoding:  the document encoding, or NULL
14932  * @options:  a combination of xmlParserOption
14933  *
14934  * parse an XML from a file descriptor and build a tree.
14935  * This reuses the existing @ctxt parser context
14936  * NOTE that the file descriptor will not be closed when the
14937  *      reader is closed or reset.
14938  *
14939  * Returns the resulting document tree
14940  */
14941 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14942 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14943               const char *URL, const char *encoding, int options)
14944 {
14945     xmlParserInputBufferPtr input;
14946     xmlParserInputPtr stream;
14947 
14948     if (fd < 0)
14949         return (NULL);
14950     if (ctxt == NULL)
14951         return (NULL);
14952 
14953     xmlCtxtReset(ctxt);
14954 
14955 
14956     input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14957     if (input == NULL)
14958         return (NULL);
14959     input->closecallback = NULL;
14960     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14961     if (stream == NULL) {
14962         xmlFreeParserInputBuffer(input);
14963         return (NULL);
14964     }
14965     inputPush(ctxt, stream);
14966     return (xmlDoRead(ctxt, URL, encoding, options, 1));
14967 }
14968 
14969 /**
14970  * xmlCtxtReadIO:
14971  * @ctxt:  an XML parser context
14972  * @ioread:  an I/O read function
14973  * @ioclose:  an I/O close function
14974  * @ioctx:  an I/O handler
14975  * @URL:  the base URL to use for the document
14976  * @encoding:  the document encoding, or NULL
14977  * @options:  a combination of xmlParserOption
14978  *
14979  * parse an XML document from I/O functions and source and build a tree.
14980  * This reuses the existing @ctxt parser context
14981  *
14982  * Returns the resulting document tree
14983  */
14984 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14985 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14986               xmlInputCloseCallback ioclose, void *ioctx,
14987 	      const char *URL,
14988               const char *encoding, int options)
14989 {
14990     xmlParserInputBufferPtr input;
14991     xmlParserInputPtr stream;
14992 
14993     if (ioread == NULL)
14994         return (NULL);
14995     if (ctxt == NULL)
14996         return (NULL);
14997 
14998     xmlCtxtReset(ctxt);
14999 
15000     input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15001                                          XML_CHAR_ENCODING_NONE);
15002     if (input == NULL)
15003         return (NULL);
15004     stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15005     if (stream == NULL) {
15006         xmlFreeParserInputBuffer(input);
15007         return (NULL);
15008     }
15009     inputPush(ctxt, stream);
15010     return (xmlDoRead(ctxt, URL, encoding, options, 1));
15011 }
15012 
15013 #define bottom_parser
15014 #include "elfgcchack.h"
15015