1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 struct _xmlStartTag {
91 const xmlChar *prefix;
92 const xmlChar *URI;
93 int line;
94 int nsNr;
95 };
96
97 static void
98 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99
100 static xmlParserCtxtPtr
101 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 const xmlChar *base, xmlParserCtxtPtr pctx);
103
104 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105
106 static int
107 xmlParseElementStart(xmlParserCtxtPtr ctxt);
108
109 static void
110 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111
112 /************************************************************************
113 * *
114 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
115 * *
116 ************************************************************************/
117
118 #define XML_MAX_HUGE_LENGTH 1000000000
119
120 #define XML_PARSER_BIG_ENTITY 1000
121 #define XML_PARSER_LOT_ENTITY 5000
122
123 /*
124 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
125 * replacement over the size in byte of the input indicates that you have
126 * and exponential behaviour. A value of 10 correspond to at least 3 entity
127 * replacement per byte of input.
128 */
129 #define XML_PARSER_NON_LINEAR 10
130
131 /*
132 * xmlParserEntityCheck
133 *
134 * Function to check non-linear entity expansion behaviour
135 * This is here to detect and stop exponential linear entity expansion
136 * This is not a limitation of the parser but a safety
137 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138 * parser option.
139 */
140 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)141 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
142 xmlEntityPtr ent, size_t replacement)
143 {
144 size_t consumed = 0;
145 int i;
146
147 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
148 return (0);
149 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
150 return (1);
151
152 /*
153 * This may look absurd but is needed to detect
154 * entities problems
155 */
156 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
157 (ent->content != NULL) && (ent->checked == 0) &&
158 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
159 unsigned long oldnbent = ctxt->nbentities, diff;
160 xmlChar *rep;
161
162 ent->checked = 1;
163
164 ++ctxt->depth;
165 rep = xmlStringDecodeEntities(ctxt, ent->content,
166 XML_SUBSTITUTE_REF, 0, 0, 0);
167 --ctxt->depth;
168 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
169 ent->content[0] = 0;
170 }
171
172 diff = ctxt->nbentities - oldnbent + 1;
173 if (diff > INT_MAX / 2)
174 diff = INT_MAX / 2;
175 ent->checked = diff * 2;
176 if (rep != NULL) {
177 if (xmlStrchr(rep, '<'))
178 ent->checked |= 1;
179 xmlFree(rep);
180 rep = NULL;
181 }
182 }
183
184 /*
185 * Prevent entity exponential check, not just replacement while
186 * parsing the DTD
187 * The check is potentially costly so do that only once in a thousand
188 */
189 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
190 (ctxt->nbentities % 1024 == 0)) {
191 for (i = 0;i < ctxt->inputNr;i++) {
192 consumed += ctxt->inputTab[i]->consumed +
193 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
194 }
195 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 ctxt->instate = XML_PARSER_EOF;
198 return (1);
199 }
200 consumed = 0;
201 }
202
203
204
205 if (replacement != 0) {
206 if (replacement < XML_MAX_TEXT_LENGTH)
207 return(0);
208
209 /*
210 * If the volume of entity copy reaches 10 times the
211 * amount of parsed data and over the large text threshold
212 * then that's very likely to be an abuse.
213 */
214 if (ctxt->input != NULL) {
215 consumed = ctxt->input->consumed +
216 (ctxt->input->cur - ctxt->input->base);
217 }
218 consumed += ctxt->sizeentities;
219
220 if (replacement < XML_PARSER_NON_LINEAR * consumed)
221 return(0);
222 } else if (size != 0) {
223 /*
224 * Do the check based on the replacement size of the entity
225 */
226 if (size < XML_PARSER_BIG_ENTITY)
227 return(0);
228
229 /*
230 * A limit on the amount of text data reasonably used
231 */
232 if (ctxt->input != NULL) {
233 consumed = ctxt->input->consumed +
234 (ctxt->input->cur - ctxt->input->base);
235 }
236 consumed += ctxt->sizeentities;
237
238 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
239 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
240 return (0);
241 } else if (ent != NULL) {
242 /*
243 * use the number of parsed entities in the replacement
244 */
245 size = ent->checked / 2;
246
247 /*
248 * The amount of data parsed counting entities size only once
249 */
250 if (ctxt->input != NULL) {
251 consumed = ctxt->input->consumed +
252 (ctxt->input->cur - ctxt->input->base);
253 }
254 consumed += ctxt->sizeentities;
255
256 /*
257 * Check the density of entities for the amount of data
258 * knowing an entity reference will take at least 3 bytes
259 */
260 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
261 return (0);
262 } else {
263 /*
264 * strange we got no data for checking
265 */
266 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
267 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
268 (ctxt->nbentities <= 10000))
269 return (0);
270 }
271 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
272 return (1);
273 }
274
275 /**
276 * xmlParserMaxDepth:
277 *
278 * arbitrary depth limit for the XML documents that we allow to
279 * process. This is not a limitation of the parser but a safety
280 * boundary feature. It can be disabled with the XML_PARSE_HUGE
281 * parser option.
282 */
283 unsigned int xmlParserMaxDepth = 256;
284
285
286
287 #define SAX2 1
288 #define XML_PARSER_BIG_BUFFER_SIZE 300
289 #define XML_PARSER_BUFFER_SIZE 100
290 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
291
292 /**
293 * XML_PARSER_CHUNK_SIZE
294 *
295 * When calling GROW that's the minimal amount of data
296 * the parser expected to have received. It is not a hard
297 * limit but an optimization when reading strings like Names
298 * It is not strictly needed as long as inputs available characters
299 * are followed by 0, which should be provided by the I/O level
300 */
301 #define XML_PARSER_CHUNK_SIZE 100
302
303 /*
304 * List of XML prefixed PI allowed by W3C specs
305 */
306
307 static const char *xmlW3CPIs[] = {
308 "xml-stylesheet",
309 "xml-model",
310 NULL
311 };
312
313
314 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
315 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
316 const xmlChar **str);
317
318 static xmlParserErrors
319 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
320 xmlSAXHandlerPtr sax,
321 void *user_data, int depth, const xmlChar *URL,
322 const xmlChar *ID, xmlNodePtr *list);
323
324 static int
325 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
326 const char *encoding);
327 #ifdef LIBXML_LEGACY_ENABLED
328 static void
329 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
330 xmlNodePtr lastNode);
331 #endif /* LIBXML_LEGACY_ENABLED */
332
333 static xmlParserErrors
334 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
335 const xmlChar *string, void *user_data, xmlNodePtr *lst);
336
337 static int
338 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
339
340 /************************************************************************
341 * *
342 * Some factorized error routines *
343 * *
344 ************************************************************************/
345
346 /**
347 * xmlErrAttributeDup:
348 * @ctxt: an XML parser context
349 * @prefix: the attribute prefix
350 * @localname: the attribute localname
351 *
352 * Handle a redefinition of attribute error
353 */
354 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)355 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
356 const xmlChar * localname)
357 {
358 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
359 (ctxt->instate == XML_PARSER_EOF))
360 return;
361 if (ctxt != NULL)
362 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
363
364 if (prefix == NULL)
365 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
366 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
367 (const char *) localname, NULL, NULL, 0, 0,
368 "Attribute %s redefined\n", localname);
369 else
370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
371 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
372 (const char *) prefix, (const char *) localname,
373 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
374 localname);
375 if (ctxt != NULL) {
376 ctxt->wellFormed = 0;
377 if (ctxt->recovery == 0)
378 ctxt->disableSAX = 1;
379 }
380 }
381
382 /**
383 * xmlFatalErr:
384 * @ctxt: an XML parser context
385 * @error: the error number
386 * @extra: extra information string
387 *
388 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
389 */
390 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)391 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
392 {
393 const char *errmsg;
394
395 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
396 (ctxt->instate == XML_PARSER_EOF))
397 return;
398 switch (error) {
399 case XML_ERR_INVALID_HEX_CHARREF:
400 errmsg = "CharRef: invalid hexadecimal value";
401 break;
402 case XML_ERR_INVALID_DEC_CHARREF:
403 errmsg = "CharRef: invalid decimal value";
404 break;
405 case XML_ERR_INVALID_CHARREF:
406 errmsg = "CharRef: invalid value";
407 break;
408 case XML_ERR_INTERNAL_ERROR:
409 errmsg = "internal error";
410 break;
411 case XML_ERR_PEREF_AT_EOF:
412 errmsg = "PEReference at end of document";
413 break;
414 case XML_ERR_PEREF_IN_PROLOG:
415 errmsg = "PEReference in prolog";
416 break;
417 case XML_ERR_PEREF_IN_EPILOG:
418 errmsg = "PEReference in epilog";
419 break;
420 case XML_ERR_PEREF_NO_NAME:
421 errmsg = "PEReference: no name";
422 break;
423 case XML_ERR_PEREF_SEMICOL_MISSING:
424 errmsg = "PEReference: expecting ';'";
425 break;
426 case XML_ERR_ENTITY_LOOP:
427 errmsg = "Detected an entity reference loop";
428 break;
429 case XML_ERR_ENTITY_NOT_STARTED:
430 errmsg = "EntityValue: \" or ' expected";
431 break;
432 case XML_ERR_ENTITY_PE_INTERNAL:
433 errmsg = "PEReferences forbidden in internal subset";
434 break;
435 case XML_ERR_ENTITY_NOT_FINISHED:
436 errmsg = "EntityValue: \" or ' expected";
437 break;
438 case XML_ERR_ATTRIBUTE_NOT_STARTED:
439 errmsg = "AttValue: \" or ' expected";
440 break;
441 case XML_ERR_LT_IN_ATTRIBUTE:
442 errmsg = "Unescaped '<' not allowed in attributes values";
443 break;
444 case XML_ERR_LITERAL_NOT_STARTED:
445 errmsg = "SystemLiteral \" or ' expected";
446 break;
447 case XML_ERR_LITERAL_NOT_FINISHED:
448 errmsg = "Unfinished System or Public ID \" or ' expected";
449 break;
450 case XML_ERR_MISPLACED_CDATA_END:
451 errmsg = "Sequence ']]>' not allowed in content";
452 break;
453 case XML_ERR_URI_REQUIRED:
454 errmsg = "SYSTEM or PUBLIC, the URI is missing";
455 break;
456 case XML_ERR_PUBID_REQUIRED:
457 errmsg = "PUBLIC, the Public Identifier is missing";
458 break;
459 case XML_ERR_HYPHEN_IN_COMMENT:
460 errmsg = "Comment must not contain '--' (double-hyphen)";
461 break;
462 case XML_ERR_PI_NOT_STARTED:
463 errmsg = "xmlParsePI : no target name";
464 break;
465 case XML_ERR_RESERVED_XML_NAME:
466 errmsg = "Invalid PI name";
467 break;
468 case XML_ERR_NOTATION_NOT_STARTED:
469 errmsg = "NOTATION: Name expected here";
470 break;
471 case XML_ERR_NOTATION_NOT_FINISHED:
472 errmsg = "'>' required to close NOTATION declaration";
473 break;
474 case XML_ERR_VALUE_REQUIRED:
475 errmsg = "Entity value required";
476 break;
477 case XML_ERR_URI_FRAGMENT:
478 errmsg = "Fragment not allowed";
479 break;
480 case XML_ERR_ATTLIST_NOT_STARTED:
481 errmsg = "'(' required to start ATTLIST enumeration";
482 break;
483 case XML_ERR_NMTOKEN_REQUIRED:
484 errmsg = "NmToken expected in ATTLIST enumeration";
485 break;
486 case XML_ERR_ATTLIST_NOT_FINISHED:
487 errmsg = "')' required to finish ATTLIST enumeration";
488 break;
489 case XML_ERR_MIXED_NOT_STARTED:
490 errmsg = "MixedContentDecl : '|' or ')*' expected";
491 break;
492 case XML_ERR_PCDATA_REQUIRED:
493 errmsg = "MixedContentDecl : '#PCDATA' expected";
494 break;
495 case XML_ERR_ELEMCONTENT_NOT_STARTED:
496 errmsg = "ContentDecl : Name or '(' expected";
497 break;
498 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
499 errmsg = "ContentDecl : ',' '|' or ')' expected";
500 break;
501 case XML_ERR_PEREF_IN_INT_SUBSET:
502 errmsg =
503 "PEReference: forbidden within markup decl in internal subset";
504 break;
505 case XML_ERR_GT_REQUIRED:
506 errmsg = "expected '>'";
507 break;
508 case XML_ERR_CONDSEC_INVALID:
509 errmsg = "XML conditional section '[' expected";
510 break;
511 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
512 errmsg = "Content error in the external subset";
513 break;
514 case XML_ERR_CONDSEC_INVALID_KEYWORD:
515 errmsg =
516 "conditional section INCLUDE or IGNORE keyword expected";
517 break;
518 case XML_ERR_CONDSEC_NOT_FINISHED:
519 errmsg = "XML conditional section not closed";
520 break;
521 case XML_ERR_XMLDECL_NOT_STARTED:
522 errmsg = "Text declaration '<?xml' required";
523 break;
524 case XML_ERR_XMLDECL_NOT_FINISHED:
525 errmsg = "parsing XML declaration: '?>' expected";
526 break;
527 case XML_ERR_EXT_ENTITY_STANDALONE:
528 errmsg = "external parsed entities cannot be standalone";
529 break;
530 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
531 errmsg = "EntityRef: expecting ';'";
532 break;
533 case XML_ERR_DOCTYPE_NOT_FINISHED:
534 errmsg = "DOCTYPE improperly terminated";
535 break;
536 case XML_ERR_LTSLASH_REQUIRED:
537 errmsg = "EndTag: '</' not found";
538 break;
539 case XML_ERR_EQUAL_REQUIRED:
540 errmsg = "expected '='";
541 break;
542 case XML_ERR_STRING_NOT_CLOSED:
543 errmsg = "String not closed expecting \" or '";
544 break;
545 case XML_ERR_STRING_NOT_STARTED:
546 errmsg = "String not started expecting ' or \"";
547 break;
548 case XML_ERR_ENCODING_NAME:
549 errmsg = "Invalid XML encoding name";
550 break;
551 case XML_ERR_STANDALONE_VALUE:
552 errmsg = "standalone accepts only 'yes' or 'no'";
553 break;
554 case XML_ERR_DOCUMENT_EMPTY:
555 errmsg = "Document is empty";
556 break;
557 case XML_ERR_DOCUMENT_END:
558 errmsg = "Extra content at the end of the document";
559 break;
560 case XML_ERR_NOT_WELL_BALANCED:
561 errmsg = "chunk is not well balanced";
562 break;
563 case XML_ERR_EXTRA_CONTENT:
564 errmsg = "extra content at the end of well balanced chunk";
565 break;
566 case XML_ERR_VERSION_MISSING:
567 errmsg = "Malformed declaration expecting version";
568 break;
569 case XML_ERR_NAME_TOO_LONG:
570 errmsg = "Name too long";
571 break;
572 #if 0
573 case:
574 errmsg = "";
575 break;
576 #endif
577 default:
578 errmsg = "Unregistered error message";
579 }
580 if (ctxt != NULL)
581 ctxt->errNo = error;
582 if (info == NULL) {
583 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
584 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
585 errmsg);
586 } else {
587 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
588 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
589 errmsg, info);
590 }
591 if (ctxt != NULL) {
592 ctxt->wellFormed = 0;
593 if (ctxt->recovery == 0)
594 ctxt->disableSAX = 1;
595 }
596 }
597
598 /**
599 * xmlFatalErrMsg:
600 * @ctxt: an XML parser context
601 * @error: the error number
602 * @msg: the error message
603 *
604 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
605 */
606 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)607 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
608 const char *msg)
609 {
610 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
611 (ctxt->instate == XML_PARSER_EOF))
612 return;
613 if (ctxt != NULL)
614 ctxt->errNo = error;
615 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
617 if (ctxt != NULL) {
618 ctxt->wellFormed = 0;
619 if (ctxt->recovery == 0)
620 ctxt->disableSAX = 1;
621 }
622 }
623
624 /**
625 * xmlWarningMsg:
626 * @ctxt: an XML parser context
627 * @error: the error number
628 * @msg: the error message
629 * @str1: extra data
630 * @str2: extra data
631 *
632 * Handle a warning.
633 */
634 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)635 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
636 const char *msg, const xmlChar *str1, const xmlChar *str2)
637 {
638 xmlStructuredErrorFunc schannel = NULL;
639
640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
643 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
644 (ctxt->sax->initialized == XML_SAX2_MAGIC))
645 schannel = ctxt->sax->serror;
646 if (ctxt != NULL) {
647 __xmlRaiseError(schannel,
648 (ctxt->sax) ? ctxt->sax->warning : NULL,
649 ctxt->userData,
650 ctxt, NULL, XML_FROM_PARSER, error,
651 XML_ERR_WARNING, NULL, 0,
652 (const char *) str1, (const char *) str2, NULL, 0, 0,
653 msg, (const char *) str1, (const char *) str2);
654 } else {
655 __xmlRaiseError(schannel, NULL, NULL,
656 ctxt, NULL, XML_FROM_PARSER, error,
657 XML_ERR_WARNING, NULL, 0,
658 (const char *) str1, (const char *) str2, NULL, 0, 0,
659 msg, (const char *) str1, (const char *) str2);
660 }
661 }
662
663 /**
664 * xmlValidityError:
665 * @ctxt: an XML parser context
666 * @error: the error number
667 * @msg: the error message
668 * @str1: extra data
669 *
670 * Handle a validity error.
671 */
672 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)673 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
674 const char *msg, const xmlChar *str1, const xmlChar *str2)
675 {
676 xmlStructuredErrorFunc schannel = NULL;
677
678 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
679 (ctxt->instate == XML_PARSER_EOF))
680 return;
681 if (ctxt != NULL) {
682 ctxt->errNo = error;
683 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
684 schannel = ctxt->sax->serror;
685 }
686 if (ctxt != NULL) {
687 __xmlRaiseError(schannel,
688 ctxt->vctxt.error, ctxt->vctxt.userData,
689 ctxt, NULL, XML_FROM_DTD, error,
690 XML_ERR_ERROR, NULL, 0, (const char *) str1,
691 (const char *) str2, NULL, 0, 0,
692 msg, (const char *) str1, (const char *) str2);
693 ctxt->valid = 0;
694 } else {
695 __xmlRaiseError(schannel, NULL, NULL,
696 ctxt, NULL, XML_FROM_DTD, error,
697 XML_ERR_ERROR, NULL, 0, (const char *) str1,
698 (const char *) str2, NULL, 0, 0,
699 msg, (const char *) str1, (const char *) str2);
700 }
701 }
702
703 /**
704 * xmlFatalErrMsgInt:
705 * @ctxt: an XML parser context
706 * @error: the error number
707 * @msg: the error message
708 * @val: an integer value
709 *
710 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
711 */
712 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)713 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714 const char *msg, int val)
715 {
716 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
717 (ctxt->instate == XML_PARSER_EOF))
718 return;
719 if (ctxt != NULL)
720 ctxt->errNo = error;
721 __xmlRaiseError(NULL, NULL, NULL,
722 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
723 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
724 if (ctxt != NULL) {
725 ctxt->wellFormed = 0;
726 if (ctxt->recovery == 0)
727 ctxt->disableSAX = 1;
728 }
729 }
730
731 /**
732 * xmlFatalErrMsgStrIntStr:
733 * @ctxt: an XML parser context
734 * @error: the error number
735 * @msg: the error message
736 * @str1: an string info
737 * @val: an integer value
738 * @str2: an string info
739 *
740 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
741 */
742 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)743 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
744 const char *msg, const xmlChar *str1, int val,
745 const xmlChar *str2)
746 {
747 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
748 (ctxt->instate == XML_PARSER_EOF))
749 return;
750 if (ctxt != NULL)
751 ctxt->errNo = error;
752 __xmlRaiseError(NULL, NULL, NULL,
753 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
754 NULL, 0, (const char *) str1, (const char *) str2,
755 NULL, val, 0, msg, str1, val, str2);
756 if (ctxt != NULL) {
757 ctxt->wellFormed = 0;
758 if (ctxt->recovery == 0)
759 ctxt->disableSAX = 1;
760 }
761 }
762
763 /**
764 * xmlFatalErrMsgStr:
765 * @ctxt: an XML parser context
766 * @error: the error number
767 * @msg: the error message
768 * @val: a string value
769 *
770 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
771 */
772 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)773 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
774 const char *msg, const xmlChar * val)
775 {
776 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
777 (ctxt->instate == XML_PARSER_EOF))
778 return;
779 if (ctxt != NULL)
780 ctxt->errNo = error;
781 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
782 XML_FROM_PARSER, error, XML_ERR_FATAL,
783 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
784 val);
785 if (ctxt != NULL) {
786 ctxt->wellFormed = 0;
787 if (ctxt->recovery == 0)
788 ctxt->disableSAX = 1;
789 }
790 }
791
792 /**
793 * xmlErrMsgStr:
794 * @ctxt: an XML parser context
795 * @error: the error number
796 * @msg: the error message
797 * @val: a string value
798 *
799 * Handle a non fatal parser error
800 */
801 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)802 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
803 const char *msg, const xmlChar * val)
804 {
805 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
806 (ctxt->instate == XML_PARSER_EOF))
807 return;
808 if (ctxt != NULL)
809 ctxt->errNo = error;
810 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
811 XML_FROM_PARSER, error, XML_ERR_ERROR,
812 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
813 val);
814 }
815
816 /**
817 * xmlNsErr:
818 * @ctxt: an XML parser context
819 * @error: the error number
820 * @msg: the message
821 * @info1: extra information string
822 * @info2: extra information string
823 *
824 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
825 */
826 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)827 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
828 const char *msg,
829 const xmlChar * info1, const xmlChar * info2,
830 const xmlChar * info3)
831 {
832 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
833 (ctxt->instate == XML_PARSER_EOF))
834 return;
835 if (ctxt != NULL)
836 ctxt->errNo = error;
837 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
838 XML_ERR_ERROR, NULL, 0, (const char *) info1,
839 (const char *) info2, (const char *) info3, 0, 0, msg,
840 info1, info2, info3);
841 if (ctxt != NULL)
842 ctxt->nsWellFormed = 0;
843 }
844
845 /**
846 * xmlNsWarn
847 * @ctxt: an XML parser context
848 * @error: the error number
849 * @msg: the message
850 * @info1: extra information string
851 * @info2: extra information string
852 *
853 * Handle a namespace warning error
854 */
855 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)856 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
857 const char *msg,
858 const xmlChar * info1, const xmlChar * info2,
859 const xmlChar * info3)
860 {
861 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
862 (ctxt->instate == XML_PARSER_EOF))
863 return;
864 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
865 XML_ERR_WARNING, NULL, 0, (const char *) info1,
866 (const char *) info2, (const char *) info3, 0, 0, msg,
867 info1, info2, info3);
868 }
869
870 /************************************************************************
871 * *
872 * Library wide options *
873 * *
874 ************************************************************************/
875
876 /**
877 * xmlHasFeature:
878 * @feature: the feature to be examined
879 *
880 * Examines if the library has been compiled with a given feature.
881 *
882 * Returns a non-zero value if the feature exist, otherwise zero.
883 * Returns zero (0) if the feature does not exist or an unknown
884 * unknown feature is requested, non-zero otherwise.
885 */
886 int
xmlHasFeature(xmlFeature feature)887 xmlHasFeature(xmlFeature feature)
888 {
889 switch (feature) {
890 case XML_WITH_THREAD:
891 #ifdef LIBXML_THREAD_ENABLED
892 return(1);
893 #else
894 return(0);
895 #endif
896 case XML_WITH_TREE:
897 #ifdef LIBXML_TREE_ENABLED
898 return(1);
899 #else
900 return(0);
901 #endif
902 case XML_WITH_OUTPUT:
903 #ifdef LIBXML_OUTPUT_ENABLED
904 return(1);
905 #else
906 return(0);
907 #endif
908 case XML_WITH_PUSH:
909 #ifdef LIBXML_PUSH_ENABLED
910 return(1);
911 #else
912 return(0);
913 #endif
914 case XML_WITH_READER:
915 #ifdef LIBXML_READER_ENABLED
916 return(1);
917 #else
918 return(0);
919 #endif
920 case XML_WITH_PATTERN:
921 #ifdef LIBXML_PATTERN_ENABLED
922 return(1);
923 #else
924 return(0);
925 #endif
926 case XML_WITH_WRITER:
927 #ifdef LIBXML_WRITER_ENABLED
928 return(1);
929 #else
930 return(0);
931 #endif
932 case XML_WITH_SAX1:
933 #ifdef LIBXML_SAX1_ENABLED
934 return(1);
935 #else
936 return(0);
937 #endif
938 case XML_WITH_FTP:
939 #ifdef LIBXML_FTP_ENABLED
940 return(1);
941 #else
942 return(0);
943 #endif
944 case XML_WITH_HTTP:
945 #ifdef LIBXML_HTTP_ENABLED
946 return(1);
947 #else
948 return(0);
949 #endif
950 case XML_WITH_VALID:
951 #ifdef LIBXML_VALID_ENABLED
952 return(1);
953 #else
954 return(0);
955 #endif
956 case XML_WITH_HTML:
957 #ifdef LIBXML_HTML_ENABLED
958 return(1);
959 #else
960 return(0);
961 #endif
962 case XML_WITH_LEGACY:
963 #ifdef LIBXML_LEGACY_ENABLED
964 return(1);
965 #else
966 return(0);
967 #endif
968 case XML_WITH_C14N:
969 #ifdef LIBXML_C14N_ENABLED
970 return(1);
971 #else
972 return(0);
973 #endif
974 case XML_WITH_CATALOG:
975 #ifdef LIBXML_CATALOG_ENABLED
976 return(1);
977 #else
978 return(0);
979 #endif
980 case XML_WITH_XPATH:
981 #ifdef LIBXML_XPATH_ENABLED
982 return(1);
983 #else
984 return(0);
985 #endif
986 case XML_WITH_XPTR:
987 #ifdef LIBXML_XPTR_ENABLED
988 return(1);
989 #else
990 return(0);
991 #endif
992 case XML_WITH_XINCLUDE:
993 #ifdef LIBXML_XINCLUDE_ENABLED
994 return(1);
995 #else
996 return(0);
997 #endif
998 case XML_WITH_ICONV:
999 #ifdef LIBXML_ICONV_ENABLED
1000 return(1);
1001 #else
1002 return(0);
1003 #endif
1004 case XML_WITH_ISO8859X:
1005 #ifdef LIBXML_ISO8859X_ENABLED
1006 return(1);
1007 #else
1008 return(0);
1009 #endif
1010 case XML_WITH_UNICODE:
1011 #ifdef LIBXML_UNICODE_ENABLED
1012 return(1);
1013 #else
1014 return(0);
1015 #endif
1016 case XML_WITH_REGEXP:
1017 #ifdef LIBXML_REGEXP_ENABLED
1018 return(1);
1019 #else
1020 return(0);
1021 #endif
1022 case XML_WITH_AUTOMATA:
1023 #ifdef LIBXML_AUTOMATA_ENABLED
1024 return(1);
1025 #else
1026 return(0);
1027 #endif
1028 case XML_WITH_EXPR:
1029 #ifdef LIBXML_EXPR_ENABLED
1030 return(1);
1031 #else
1032 return(0);
1033 #endif
1034 case XML_WITH_SCHEMAS:
1035 #ifdef LIBXML_SCHEMAS_ENABLED
1036 return(1);
1037 #else
1038 return(0);
1039 #endif
1040 case XML_WITH_SCHEMATRON:
1041 #ifdef LIBXML_SCHEMATRON_ENABLED
1042 return(1);
1043 #else
1044 return(0);
1045 #endif
1046 case XML_WITH_MODULES:
1047 #ifdef LIBXML_MODULES_ENABLED
1048 return(1);
1049 #else
1050 return(0);
1051 #endif
1052 case XML_WITH_DEBUG:
1053 #ifdef LIBXML_DEBUG_ENABLED
1054 return(1);
1055 #else
1056 return(0);
1057 #endif
1058 case XML_WITH_DEBUG_MEM:
1059 #ifdef DEBUG_MEMORY_LOCATION
1060 return(1);
1061 #else
1062 return(0);
1063 #endif
1064 case XML_WITH_DEBUG_RUN:
1065 #ifdef LIBXML_DEBUG_RUNTIME
1066 return(1);
1067 #else
1068 return(0);
1069 #endif
1070 case XML_WITH_ZLIB:
1071 #ifdef LIBXML_ZLIB_ENABLED
1072 return(1);
1073 #else
1074 return(0);
1075 #endif
1076 case XML_WITH_LZMA:
1077 #ifdef LIBXML_LZMA_ENABLED
1078 return(1);
1079 #else
1080 return(0);
1081 #endif
1082 case XML_WITH_ICU:
1083 #ifdef LIBXML_ICU_ENABLED
1084 return(1);
1085 #else
1086 return(0);
1087 #endif
1088 default:
1089 break;
1090 }
1091 return(0);
1092 }
1093
1094 /************************************************************************
1095 * *
1096 * SAX2 defaulted attributes handling *
1097 * *
1098 ************************************************************************/
1099
1100 /**
1101 * xmlDetectSAX2:
1102 * @ctxt: an XML parser context
1103 *
1104 * Do the SAX2 detection and specific initialization
1105 */
1106 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1107 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1108 xmlSAXHandlerPtr sax;
1109
1110 /* Avoid unused variable warning if features are disabled. */
1111 (void) sax;
1112
1113 if (ctxt == NULL) return;
1114 sax = ctxt->sax;
1115 #ifdef LIBXML_SAX1_ENABLED
1116 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1117 ((sax->startElementNs != NULL) ||
1118 (sax->endElementNs != NULL) ||
1119 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1120 ctxt->sax2 = 1;
1121 #else
1122 ctxt->sax2 = 1;
1123 #endif /* LIBXML_SAX1_ENABLED */
1124
1125 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1126 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1127 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1128 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1129 (ctxt->str_xml_ns == NULL)) {
1130 xmlErrMemory(ctxt, NULL);
1131 }
1132 }
1133
1134 typedef struct _xmlDefAttrs xmlDefAttrs;
1135 typedef xmlDefAttrs *xmlDefAttrsPtr;
1136 struct _xmlDefAttrs {
1137 int nbAttrs; /* number of defaulted attributes on that element */
1138 int maxAttrs; /* the size of the array */
1139 #if __STDC_VERSION__ >= 199901L
1140 /* Using a C99 flexible array member avoids UBSan errors. */
1141 const xmlChar *values[]; /* array of localname/prefix/values/external */
1142 #else
1143 const xmlChar *values[5];
1144 #endif
1145 };
1146
1147 /**
1148 * xmlAttrNormalizeSpace:
1149 * @src: the source string
1150 * @dst: the target string
1151 *
1152 * Normalize the space in non CDATA attribute values:
1153 * If the attribute type is not CDATA, then the XML processor MUST further
1154 * process the normalized attribute value by discarding any leading and
1155 * trailing space (#x20) characters, and by replacing sequences of space
1156 * (#x20) characters by a single space (#x20) character.
1157 * Note that the size of dst need to be at least src, and if one doesn't need
1158 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1159 * passing src as dst is just fine.
1160 *
1161 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1162 * is needed.
1163 */
1164 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1165 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1166 {
1167 if ((src == NULL) || (dst == NULL))
1168 return(NULL);
1169
1170 while (*src == 0x20) src++;
1171 while (*src != 0) {
1172 if (*src == 0x20) {
1173 while (*src == 0x20) src++;
1174 if (*src != 0)
1175 *dst++ = 0x20;
1176 } else {
1177 *dst++ = *src++;
1178 }
1179 }
1180 *dst = 0;
1181 if (dst == src)
1182 return(NULL);
1183 return(dst);
1184 }
1185
1186 /**
1187 * xmlAttrNormalizeSpace2:
1188 * @src: the source string
1189 *
1190 * Normalize the space in non CDATA attribute values, a slightly more complex
1191 * front end to avoid allocation problems when running on attribute values
1192 * coming from the input.
1193 *
1194 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1195 * is needed.
1196 */
1197 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1198 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1199 {
1200 int i;
1201 int remove_head = 0;
1202 int need_realloc = 0;
1203 const xmlChar *cur;
1204
1205 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1206 return(NULL);
1207 i = *len;
1208 if (i <= 0)
1209 return(NULL);
1210
1211 cur = src;
1212 while (*cur == 0x20) {
1213 cur++;
1214 remove_head++;
1215 }
1216 while (*cur != 0) {
1217 if (*cur == 0x20) {
1218 cur++;
1219 if ((*cur == 0x20) || (*cur == 0)) {
1220 need_realloc = 1;
1221 break;
1222 }
1223 } else
1224 cur++;
1225 }
1226 if (need_realloc) {
1227 xmlChar *ret;
1228
1229 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1230 if (ret == NULL) {
1231 xmlErrMemory(ctxt, NULL);
1232 return(NULL);
1233 }
1234 xmlAttrNormalizeSpace(ret, ret);
1235 *len = (int) strlen((const char *)ret);
1236 return(ret);
1237 } else if (remove_head) {
1238 *len -= remove_head;
1239 memmove(src, src + remove_head, 1 + *len);
1240 return(src);
1241 }
1242 return(NULL);
1243 }
1244
1245 /**
1246 * xmlAddDefAttrs:
1247 * @ctxt: an XML parser context
1248 * @fullname: the element fullname
1249 * @fullattr: the attribute fullname
1250 * @value: the attribute value
1251 *
1252 * Add a defaulted attribute for an element
1253 */
1254 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1255 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1256 const xmlChar *fullname,
1257 const xmlChar *fullattr,
1258 const xmlChar *value) {
1259 xmlDefAttrsPtr defaults;
1260 int len;
1261 const xmlChar *name;
1262 const xmlChar *prefix;
1263
1264 /*
1265 * Allows to detect attribute redefinitions
1266 */
1267 if (ctxt->attsSpecial != NULL) {
1268 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1269 return;
1270 }
1271
1272 if (ctxt->attsDefault == NULL) {
1273 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1274 if (ctxt->attsDefault == NULL)
1275 goto mem_error;
1276 }
1277
1278 /*
1279 * split the element name into prefix:localname , the string found
1280 * are within the DTD and then not associated to namespace names.
1281 */
1282 name = xmlSplitQName3(fullname, &len);
1283 if (name == NULL) {
1284 name = xmlDictLookup(ctxt->dict, fullname, -1);
1285 prefix = NULL;
1286 } else {
1287 name = xmlDictLookup(ctxt->dict, name, -1);
1288 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1289 }
1290
1291 /*
1292 * make sure there is some storage
1293 */
1294 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1295 if (defaults == NULL) {
1296 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1297 (4 * 5) * sizeof(const xmlChar *));
1298 if (defaults == NULL)
1299 goto mem_error;
1300 defaults->nbAttrs = 0;
1301 defaults->maxAttrs = 4;
1302 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1303 defaults, NULL) < 0) {
1304 xmlFree(defaults);
1305 goto mem_error;
1306 }
1307 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1308 xmlDefAttrsPtr temp;
1309
1310 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1311 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1312 if (temp == NULL)
1313 goto mem_error;
1314 defaults = temp;
1315 defaults->maxAttrs *= 2;
1316 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1317 defaults, NULL) < 0) {
1318 xmlFree(defaults);
1319 goto mem_error;
1320 }
1321 }
1322
1323 /*
1324 * Split the element name into prefix:localname , the string found
1325 * are within the DTD and hen not associated to namespace names.
1326 */
1327 name = xmlSplitQName3(fullattr, &len);
1328 if (name == NULL) {
1329 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1330 prefix = NULL;
1331 } else {
1332 name = xmlDictLookup(ctxt->dict, name, -1);
1333 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1334 }
1335
1336 defaults->values[5 * defaults->nbAttrs] = name;
1337 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1338 /* intern the string and precompute the end */
1339 len = xmlStrlen(value);
1340 value = xmlDictLookup(ctxt->dict, value, len);
1341 defaults->values[5 * defaults->nbAttrs + 2] = value;
1342 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1343 if (ctxt->external)
1344 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1345 else
1346 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1347 defaults->nbAttrs++;
1348
1349 return;
1350
1351 mem_error:
1352 xmlErrMemory(ctxt, NULL);
1353 return;
1354 }
1355
1356 /**
1357 * xmlAddSpecialAttr:
1358 * @ctxt: an XML parser context
1359 * @fullname: the element fullname
1360 * @fullattr: the attribute fullname
1361 * @type: the attribute type
1362 *
1363 * Register this attribute type
1364 */
1365 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1366 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1367 const xmlChar *fullname,
1368 const xmlChar *fullattr,
1369 int type)
1370 {
1371 if (ctxt->attsSpecial == NULL) {
1372 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1373 if (ctxt->attsSpecial == NULL)
1374 goto mem_error;
1375 }
1376
1377 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1378 return;
1379
1380 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1381 (void *) (ptrdiff_t) type);
1382 return;
1383
1384 mem_error:
1385 xmlErrMemory(ctxt, NULL);
1386 return;
1387 }
1388
1389 /**
1390 * xmlCleanSpecialAttrCallback:
1391 *
1392 * Removes CDATA attributes from the special attribute table
1393 */
1394 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1395 xmlCleanSpecialAttrCallback(void *payload, void *data,
1396 const xmlChar *fullname, const xmlChar *fullattr,
1397 const xmlChar *unused ATTRIBUTE_UNUSED) {
1398 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1399
1400 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1401 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1402 }
1403 }
1404
1405 /**
1406 * xmlCleanSpecialAttr:
1407 * @ctxt: an XML parser context
1408 *
1409 * Trim the list of attributes defined to remove all those of type
1410 * CDATA as they are not special. This call should be done when finishing
1411 * to parse the DTD and before starting to parse the document root.
1412 */
1413 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1414 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1415 {
1416 if (ctxt->attsSpecial == NULL)
1417 return;
1418
1419 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1420
1421 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1422 xmlHashFree(ctxt->attsSpecial, NULL);
1423 ctxt->attsSpecial = NULL;
1424 }
1425 return;
1426 }
1427
1428 /**
1429 * xmlCheckLanguageID:
1430 * @lang: pointer to the string value
1431 *
1432 * Checks that the value conforms to the LanguageID production:
1433 *
1434 * NOTE: this is somewhat deprecated, those productions were removed from
1435 * the XML Second edition.
1436 *
1437 * [33] LanguageID ::= Langcode ('-' Subcode)*
1438 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1439 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1440 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1441 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1442 * [38] Subcode ::= ([a-z] | [A-Z])+
1443 *
1444 * The current REC reference the successors of RFC 1766, currently 5646
1445 *
1446 * http://www.rfc-editor.org/rfc/rfc5646.txt
1447 * langtag = language
1448 * ["-" script]
1449 * ["-" region]
1450 * *("-" variant)
1451 * *("-" extension)
1452 * ["-" privateuse]
1453 * language = 2*3ALPHA ; shortest ISO 639 code
1454 * ["-" extlang] ; sometimes followed by
1455 * ; extended language subtags
1456 * / 4ALPHA ; or reserved for future use
1457 * / 5*8ALPHA ; or registered language subtag
1458 *
1459 * extlang = 3ALPHA ; selected ISO 639 codes
1460 * *2("-" 3ALPHA) ; permanently reserved
1461 *
1462 * script = 4ALPHA ; ISO 15924 code
1463 *
1464 * region = 2ALPHA ; ISO 3166-1 code
1465 * / 3DIGIT ; UN M.49 code
1466 *
1467 * variant = 5*8alphanum ; registered variants
1468 * / (DIGIT 3alphanum)
1469 *
1470 * extension = singleton 1*("-" (2*8alphanum))
1471 *
1472 * ; Single alphanumerics
1473 * ; "x" reserved for private use
1474 * singleton = DIGIT ; 0 - 9
1475 * / %x41-57 ; A - W
1476 * / %x59-5A ; Y - Z
1477 * / %x61-77 ; a - w
1478 * / %x79-7A ; y - z
1479 *
1480 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1481 * The parser below doesn't try to cope with extension or privateuse
1482 * that could be added but that's not interoperable anyway
1483 *
1484 * Returns 1 if correct 0 otherwise
1485 **/
1486 int
xmlCheckLanguageID(const xmlChar * lang)1487 xmlCheckLanguageID(const xmlChar * lang)
1488 {
1489 const xmlChar *cur = lang, *nxt;
1490
1491 if (cur == NULL)
1492 return (0);
1493 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1494 ((cur[0] == 'I') && (cur[1] == '-')) ||
1495 ((cur[0] == 'x') && (cur[1] == '-')) ||
1496 ((cur[0] == 'X') && (cur[1] == '-'))) {
1497 /*
1498 * Still allow IANA code and user code which were coming
1499 * from the previous version of the XML-1.0 specification
1500 * it's deprecated but we should not fail
1501 */
1502 cur += 2;
1503 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1504 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1505 cur++;
1506 return(cur[0] == 0);
1507 }
1508 nxt = cur;
1509 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1510 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1511 nxt++;
1512 if (nxt - cur >= 4) {
1513 /*
1514 * Reserved
1515 */
1516 if ((nxt - cur > 8) || (nxt[0] != 0))
1517 return(0);
1518 return(1);
1519 }
1520 if (nxt - cur < 2)
1521 return(0);
1522 /* we got an ISO 639 code */
1523 if (nxt[0] == 0)
1524 return(1);
1525 if (nxt[0] != '-')
1526 return(0);
1527
1528 nxt++;
1529 cur = nxt;
1530 /* now we can have extlang or script or region or variant */
1531 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1532 goto region_m49;
1533
1534 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1535 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1536 nxt++;
1537 if (nxt - cur == 4)
1538 goto script;
1539 if (nxt - cur == 2)
1540 goto region;
1541 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1542 goto variant;
1543 if (nxt - cur != 3)
1544 return(0);
1545 /* we parsed an extlang */
1546 if (nxt[0] == 0)
1547 return(1);
1548 if (nxt[0] != '-')
1549 return(0);
1550
1551 nxt++;
1552 cur = nxt;
1553 /* now we can have script or region or variant */
1554 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1555 goto region_m49;
1556
1557 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1558 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1559 nxt++;
1560 if (nxt - cur == 2)
1561 goto region;
1562 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1563 goto variant;
1564 if (nxt - cur != 4)
1565 return(0);
1566 /* we parsed a script */
1567 script:
1568 if (nxt[0] == 0)
1569 return(1);
1570 if (nxt[0] != '-')
1571 return(0);
1572
1573 nxt++;
1574 cur = nxt;
1575 /* now we can have region or variant */
1576 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1577 goto region_m49;
1578
1579 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1580 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1581 nxt++;
1582
1583 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1584 goto variant;
1585 if (nxt - cur != 2)
1586 return(0);
1587 /* we parsed a region */
1588 region:
1589 if (nxt[0] == 0)
1590 return(1);
1591 if (nxt[0] != '-')
1592 return(0);
1593
1594 nxt++;
1595 cur = nxt;
1596 /* now we can just have a variant */
1597 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1598 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1599 nxt++;
1600
1601 if ((nxt - cur < 5) || (nxt - cur > 8))
1602 return(0);
1603
1604 /* we parsed a variant */
1605 variant:
1606 if (nxt[0] == 0)
1607 return(1);
1608 if (nxt[0] != '-')
1609 return(0);
1610 /* extensions and private use subtags not checked */
1611 return (1);
1612
1613 region_m49:
1614 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1615 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1616 nxt += 3;
1617 goto region;
1618 }
1619 return(0);
1620 }
1621
1622 /************************************************************************
1623 * *
1624 * Parser stacks related functions and macros *
1625 * *
1626 ************************************************************************/
1627
1628 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1629 const xmlChar ** str);
1630
1631 #ifdef SAX2
1632 /**
1633 * nsPush:
1634 * @ctxt: an XML parser context
1635 * @prefix: the namespace prefix or NULL
1636 * @URL: the namespace name
1637 *
1638 * Pushes a new parser namespace on top of the ns stack
1639 *
1640 * Returns -1 in case of error, -2 if the namespace should be discarded
1641 * and the index in the stack otherwise.
1642 */
1643 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1644 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1645 {
1646 if (ctxt->options & XML_PARSE_NSCLEAN) {
1647 int i;
1648 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1649 if (ctxt->nsTab[i] == prefix) {
1650 /* in scope */
1651 if (ctxt->nsTab[i + 1] == URL)
1652 return(-2);
1653 /* out of scope keep it */
1654 break;
1655 }
1656 }
1657 }
1658 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1659 ctxt->nsMax = 10;
1660 ctxt->nsNr = 0;
1661 ctxt->nsTab = (const xmlChar **)
1662 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1663 if (ctxt->nsTab == NULL) {
1664 xmlErrMemory(ctxt, NULL);
1665 ctxt->nsMax = 0;
1666 return (-1);
1667 }
1668 } else if (ctxt->nsNr >= ctxt->nsMax) {
1669 const xmlChar ** tmp;
1670 ctxt->nsMax *= 2;
1671 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1672 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1673 if (tmp == NULL) {
1674 xmlErrMemory(ctxt, NULL);
1675 ctxt->nsMax /= 2;
1676 return (-1);
1677 }
1678 ctxt->nsTab = tmp;
1679 }
1680 ctxt->nsTab[ctxt->nsNr++] = prefix;
1681 ctxt->nsTab[ctxt->nsNr++] = URL;
1682 return (ctxt->nsNr);
1683 }
1684 /**
1685 * nsPop:
1686 * @ctxt: an XML parser context
1687 * @nr: the number to pop
1688 *
1689 * Pops the top @nr parser prefix/namespace from the ns stack
1690 *
1691 * Returns the number of namespaces removed
1692 */
1693 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1694 nsPop(xmlParserCtxtPtr ctxt, int nr)
1695 {
1696 int i;
1697
1698 if (ctxt->nsTab == NULL) return(0);
1699 if (ctxt->nsNr < nr) {
1700 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1701 nr = ctxt->nsNr;
1702 }
1703 if (ctxt->nsNr <= 0)
1704 return (0);
1705
1706 for (i = 0;i < nr;i++) {
1707 ctxt->nsNr--;
1708 ctxt->nsTab[ctxt->nsNr] = NULL;
1709 }
1710 return(nr);
1711 }
1712 #endif
1713
1714 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1715 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1716 const xmlChar **atts;
1717 int *attallocs;
1718 int maxatts;
1719
1720 if (ctxt->atts == NULL) {
1721 maxatts = 55; /* allow for 10 attrs by default */
1722 atts = (const xmlChar **)
1723 xmlMalloc(maxatts * sizeof(xmlChar *));
1724 if (atts == NULL) goto mem_error;
1725 ctxt->atts = atts;
1726 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1727 if (attallocs == NULL) goto mem_error;
1728 ctxt->attallocs = attallocs;
1729 ctxt->maxatts = maxatts;
1730 } else if (nr + 5 > ctxt->maxatts) {
1731 maxatts = (nr + 5) * 2;
1732 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1733 maxatts * sizeof(const xmlChar *));
1734 if (atts == NULL) goto mem_error;
1735 ctxt->atts = atts;
1736 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1737 (maxatts / 5) * sizeof(int));
1738 if (attallocs == NULL) goto mem_error;
1739 ctxt->attallocs = attallocs;
1740 ctxt->maxatts = maxatts;
1741 }
1742 return(ctxt->maxatts);
1743 mem_error:
1744 xmlErrMemory(ctxt, NULL);
1745 return(-1);
1746 }
1747
1748 /**
1749 * inputPush:
1750 * @ctxt: an XML parser context
1751 * @value: the parser input
1752 *
1753 * Pushes a new parser input on top of the input stack
1754 *
1755 * Returns -1 in case of error, the index in the stack otherwise
1756 */
1757 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1758 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1759 {
1760 if ((ctxt == NULL) || (value == NULL))
1761 return(-1);
1762 if (ctxt->inputNr >= ctxt->inputMax) {
1763 ctxt->inputMax *= 2;
1764 ctxt->inputTab =
1765 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1766 ctxt->inputMax *
1767 sizeof(ctxt->inputTab[0]));
1768 if (ctxt->inputTab == NULL) {
1769 xmlErrMemory(ctxt, NULL);
1770 xmlFreeInputStream(value);
1771 ctxt->inputMax /= 2;
1772 value = NULL;
1773 return (-1);
1774 }
1775 }
1776 ctxt->inputTab[ctxt->inputNr] = value;
1777 ctxt->input = value;
1778 return (ctxt->inputNr++);
1779 }
1780 /**
1781 * inputPop:
1782 * @ctxt: an XML parser context
1783 *
1784 * Pops the top parser input from the input stack
1785 *
1786 * Returns the input just removed
1787 */
1788 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1789 inputPop(xmlParserCtxtPtr ctxt)
1790 {
1791 xmlParserInputPtr ret;
1792
1793 if (ctxt == NULL)
1794 return(NULL);
1795 if (ctxt->inputNr <= 0)
1796 return (NULL);
1797 ctxt->inputNr--;
1798 if (ctxt->inputNr > 0)
1799 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1800 else
1801 ctxt->input = NULL;
1802 ret = ctxt->inputTab[ctxt->inputNr];
1803 ctxt->inputTab[ctxt->inputNr] = NULL;
1804 return (ret);
1805 }
1806 /**
1807 * nodePush:
1808 * @ctxt: an XML parser context
1809 * @value: the element node
1810 *
1811 * Pushes a new element node on top of the node stack
1812 *
1813 * Returns -1 in case of error, the index in the stack otherwise
1814 */
1815 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1816 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1817 {
1818 if (ctxt == NULL) return(0);
1819 if (ctxt->nodeNr >= ctxt->nodeMax) {
1820 xmlNodePtr *tmp;
1821
1822 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1823 ctxt->nodeMax * 2 *
1824 sizeof(ctxt->nodeTab[0]));
1825 if (tmp == NULL) {
1826 xmlErrMemory(ctxt, NULL);
1827 return (-1);
1828 }
1829 ctxt->nodeTab = tmp;
1830 ctxt->nodeMax *= 2;
1831 }
1832 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1833 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1834 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1835 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1836 xmlParserMaxDepth);
1837 xmlHaltParser(ctxt);
1838 return(-1);
1839 }
1840 ctxt->nodeTab[ctxt->nodeNr] = value;
1841 ctxt->node = value;
1842 return (ctxt->nodeNr++);
1843 }
1844
1845 /**
1846 * nodePop:
1847 * @ctxt: an XML parser context
1848 *
1849 * Pops the top element node from the node stack
1850 *
1851 * Returns the node just removed
1852 */
1853 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1854 nodePop(xmlParserCtxtPtr ctxt)
1855 {
1856 xmlNodePtr ret;
1857
1858 if (ctxt == NULL) return(NULL);
1859 if (ctxt->nodeNr <= 0)
1860 return (NULL);
1861 ctxt->nodeNr--;
1862 if (ctxt->nodeNr > 0)
1863 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1864 else
1865 ctxt->node = NULL;
1866 ret = ctxt->nodeTab[ctxt->nodeNr];
1867 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1868 return (ret);
1869 }
1870
1871 /**
1872 * nameNsPush:
1873 * @ctxt: an XML parser context
1874 * @value: the element name
1875 * @prefix: the element prefix
1876 * @URI: the element namespace name
1877 * @line: the current line number for error messages
1878 * @nsNr: the number of namespaces pushed on the namespace table
1879 *
1880 * Pushes a new element name/prefix/URL on top of the name stack
1881 *
1882 * Returns -1 in case of error, the index in the stack otherwise
1883 */
1884 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)1885 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1886 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1887 {
1888 xmlStartTag *tag;
1889
1890 if (ctxt->nameNr >= ctxt->nameMax) {
1891 const xmlChar * *tmp;
1892 xmlStartTag *tmp2;
1893 ctxt->nameMax *= 2;
1894 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1895 ctxt->nameMax *
1896 sizeof(ctxt->nameTab[0]));
1897 if (tmp == NULL) {
1898 ctxt->nameMax /= 2;
1899 goto mem_error;
1900 }
1901 ctxt->nameTab = tmp;
1902 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1903 ctxt->nameMax *
1904 sizeof(ctxt->pushTab[0]));
1905 if (tmp2 == NULL) {
1906 ctxt->nameMax /= 2;
1907 goto mem_error;
1908 }
1909 ctxt->pushTab = tmp2;
1910 } else if (ctxt->pushTab == NULL) {
1911 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1912 sizeof(ctxt->pushTab[0]));
1913 if (ctxt->pushTab == NULL)
1914 goto mem_error;
1915 }
1916 ctxt->nameTab[ctxt->nameNr] = value;
1917 ctxt->name = value;
1918 tag = &ctxt->pushTab[ctxt->nameNr];
1919 tag->prefix = prefix;
1920 tag->URI = URI;
1921 tag->line = line;
1922 tag->nsNr = nsNr;
1923 return (ctxt->nameNr++);
1924 mem_error:
1925 xmlErrMemory(ctxt, NULL);
1926 return (-1);
1927 }
1928 #ifdef LIBXML_PUSH_ENABLED
1929 /**
1930 * nameNsPop:
1931 * @ctxt: an XML parser context
1932 *
1933 * Pops the top element/prefix/URI name from the name stack
1934 *
1935 * Returns the name just removed
1936 */
1937 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1938 nameNsPop(xmlParserCtxtPtr ctxt)
1939 {
1940 const xmlChar *ret;
1941
1942 if (ctxt->nameNr <= 0)
1943 return (NULL);
1944 ctxt->nameNr--;
1945 if (ctxt->nameNr > 0)
1946 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1947 else
1948 ctxt->name = NULL;
1949 ret = ctxt->nameTab[ctxt->nameNr];
1950 ctxt->nameTab[ctxt->nameNr] = NULL;
1951 return (ret);
1952 }
1953 #endif /* LIBXML_PUSH_ENABLED */
1954
1955 /**
1956 * namePush:
1957 * @ctxt: an XML parser context
1958 * @value: the element name
1959 *
1960 * Pushes a new element name on top of the name stack
1961 *
1962 * Returns -1 in case of error, the index in the stack otherwise
1963 */
1964 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1965 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1966 {
1967 if (ctxt == NULL) return (-1);
1968
1969 if (ctxt->nameNr >= ctxt->nameMax) {
1970 const xmlChar * *tmp;
1971 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1972 ctxt->nameMax * 2 *
1973 sizeof(ctxt->nameTab[0]));
1974 if (tmp == NULL) {
1975 goto mem_error;
1976 }
1977 ctxt->nameTab = tmp;
1978 ctxt->nameMax *= 2;
1979 }
1980 ctxt->nameTab[ctxt->nameNr] = value;
1981 ctxt->name = value;
1982 return (ctxt->nameNr++);
1983 mem_error:
1984 xmlErrMemory(ctxt, NULL);
1985 return (-1);
1986 }
1987 /**
1988 * namePop:
1989 * @ctxt: an XML parser context
1990 *
1991 * Pops the top element name from the name stack
1992 *
1993 * Returns the name just removed
1994 */
1995 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1996 namePop(xmlParserCtxtPtr ctxt)
1997 {
1998 const xmlChar *ret;
1999
2000 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2001 return (NULL);
2002 ctxt->nameNr--;
2003 if (ctxt->nameNr > 0)
2004 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2005 else
2006 ctxt->name = NULL;
2007 ret = ctxt->nameTab[ctxt->nameNr];
2008 ctxt->nameTab[ctxt->nameNr] = NULL;
2009 return (ret);
2010 }
2011
spacePush(xmlParserCtxtPtr ctxt,int val)2012 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2013 if (ctxt->spaceNr >= ctxt->spaceMax) {
2014 int *tmp;
2015
2016 ctxt->spaceMax *= 2;
2017 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2018 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2019 if (tmp == NULL) {
2020 xmlErrMemory(ctxt, NULL);
2021 ctxt->spaceMax /=2;
2022 return(-1);
2023 }
2024 ctxt->spaceTab = tmp;
2025 }
2026 ctxt->spaceTab[ctxt->spaceNr] = val;
2027 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2028 return(ctxt->spaceNr++);
2029 }
2030
spacePop(xmlParserCtxtPtr ctxt)2031 static int spacePop(xmlParserCtxtPtr ctxt) {
2032 int ret;
2033 if (ctxt->spaceNr <= 0) return(0);
2034 ctxt->spaceNr--;
2035 if (ctxt->spaceNr > 0)
2036 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2037 else
2038 ctxt->space = &ctxt->spaceTab[0];
2039 ret = ctxt->spaceTab[ctxt->spaceNr];
2040 ctxt->spaceTab[ctxt->spaceNr] = -1;
2041 return(ret);
2042 }
2043
2044 /*
2045 * Macros for accessing the content. Those should be used only by the parser,
2046 * and not exported.
2047 *
2048 * Dirty macros, i.e. one often need to make assumption on the context to
2049 * use them
2050 *
2051 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2052 * To be used with extreme caution since operations consuming
2053 * characters may move the input buffer to a different location !
2054 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2055 * This should be used internally by the parser
2056 * only to compare to ASCII values otherwise it would break when
2057 * running with UTF-8 encoding.
2058 * RAW same as CUR but in the input buffer, bypass any token
2059 * extraction that may have been done
2060 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2061 * to compare on ASCII based substring.
2062 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2063 * strings without newlines within the parser.
2064 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2065 * defined char within the parser.
2066 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2067 *
2068 * NEXT Skip to the next character, this does the proper decoding
2069 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2070 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2071 * CUR_CHAR(l) returns the current unicode character (int), set l
2072 * to the number of xmlChars used for the encoding [0-5].
2073 * CUR_SCHAR same but operate on a string instead of the context
2074 * COPY_BUF copy the current unicode char to the target buffer, increment
2075 * the index
2076 * GROW, SHRINK handling of input buffers
2077 */
2078
2079 #define RAW (*ctxt->input->cur)
2080 #define CUR (*ctxt->input->cur)
2081 #define NXT(val) ctxt->input->cur[(val)]
2082 #define CUR_PTR ctxt->input->cur
2083 #define BASE_PTR ctxt->input->base
2084
2085 #define CMP4( s, c1, c2, c3, c4 ) \
2086 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2087 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2088 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2089 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2090 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2091 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2092 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2093 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2094 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2095 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2096 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2097 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2098 ((unsigned char *) s)[ 8 ] == c9 )
2099 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2100 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2101 ((unsigned char *) s)[ 9 ] == c10 )
2102
2103 #define SKIP(val) do { \
2104 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2105 if (*ctxt->input->cur == 0) \
2106 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2107 } while (0)
2108
2109 #define SKIPL(val) do { \
2110 int skipl; \
2111 for(skipl=0; skipl<val; skipl++) { \
2112 if (*(ctxt->input->cur) == '\n') { \
2113 ctxt->input->line++; ctxt->input->col = 1; \
2114 } else ctxt->input->col++; \
2115 ctxt->input->cur++; \
2116 } \
2117 if (*ctxt->input->cur == 0) \
2118 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2119 } while (0)
2120
2121 #define SHRINK if ((ctxt->progressive == 0) && \
2122 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2123 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2124 xmlSHRINK (ctxt);
2125
xmlSHRINK(xmlParserCtxtPtr ctxt)2126 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2127 xmlParserInputShrink(ctxt->input);
2128 if (*ctxt->input->cur == 0)
2129 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2130 }
2131
2132 #define GROW if ((ctxt->progressive == 0) && \
2133 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2134 xmlGROW (ctxt);
2135
xmlGROW(xmlParserCtxtPtr ctxt)2136 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2137 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2138 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2139
2140 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2141 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2142 ((ctxt->input->buf) &&
2143 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2144 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2145 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2146 xmlHaltParser(ctxt);
2147 return;
2148 }
2149 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2150 if ((ctxt->input->cur > ctxt->input->end) ||
2151 (ctxt->input->cur < ctxt->input->base)) {
2152 xmlHaltParser(ctxt);
2153 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2154 return;
2155 }
2156 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158 }
2159
2160 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2161
2162 #define NEXT xmlNextChar(ctxt)
2163
2164 #define NEXT1 { \
2165 ctxt->input->col++; \
2166 ctxt->input->cur++; \
2167 if (*ctxt->input->cur == 0) \
2168 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2169 }
2170
2171 #define NEXTL(l) do { \
2172 if (*(ctxt->input->cur) == '\n') { \
2173 ctxt->input->line++; ctxt->input->col = 1; \
2174 } else ctxt->input->col++; \
2175 ctxt->input->cur += l; \
2176 } while (0)
2177
2178 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2179 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2180
2181 #define COPY_BUF(l,b,i,v) \
2182 if (l == 1) b[i++] = (xmlChar) v; \
2183 else i += xmlCopyCharMultiByte(&b[i],v)
2184
2185 /**
2186 * xmlSkipBlankChars:
2187 * @ctxt: the XML parser context
2188 *
2189 * skip all blanks character found at that point in the input streams.
2190 * It pops up finished entities in the process if allowable at that point.
2191 *
2192 * Returns the number of space chars skipped
2193 */
2194
2195 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2196 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2197 int res = 0;
2198
2199 /*
2200 * It's Okay to use CUR/NEXT here since all the blanks are on
2201 * the ASCII range.
2202 */
2203 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2204 (ctxt->instate == XML_PARSER_START)) {
2205 const xmlChar *cur;
2206 /*
2207 * if we are in the document content, go really fast
2208 */
2209 cur = ctxt->input->cur;
2210 while (IS_BLANK_CH(*cur)) {
2211 if (*cur == '\n') {
2212 ctxt->input->line++; ctxt->input->col = 1;
2213 } else {
2214 ctxt->input->col++;
2215 }
2216 cur++;
2217 res++;
2218 if (*cur == 0) {
2219 ctxt->input->cur = cur;
2220 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2221 cur = ctxt->input->cur;
2222 }
2223 }
2224 ctxt->input->cur = cur;
2225 } else {
2226 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2227
2228 while (1) {
2229 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2230 NEXT;
2231 } else if (CUR == '%') {
2232 /*
2233 * Need to handle support of entities branching here
2234 */
2235 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2236 break;
2237 xmlParsePEReference(ctxt);
2238 } else if (CUR == 0) {
2239 if (ctxt->inputNr <= 1)
2240 break;
2241 xmlPopInput(ctxt);
2242 } else {
2243 break;
2244 }
2245
2246 /*
2247 * Also increase the counter when entering or exiting a PERef.
2248 * The spec says: "When a parameter-entity reference is recognized
2249 * in the DTD and included, its replacement text MUST be enlarged
2250 * by the attachment of one leading and one following space (#x20)
2251 * character."
2252 */
2253 res++;
2254 }
2255 }
2256 return(res);
2257 }
2258
2259 /************************************************************************
2260 * *
2261 * Commodity functions to handle entities *
2262 * *
2263 ************************************************************************/
2264
2265 /**
2266 * xmlPopInput:
2267 * @ctxt: an XML parser context
2268 *
2269 * xmlPopInput: the current input pointed by ctxt->input came to an end
2270 * pop it and return the next char.
2271 *
2272 * Returns the current xmlChar in the parser context
2273 */
2274 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2275 xmlPopInput(xmlParserCtxtPtr ctxt) {
2276 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2277 if (xmlParserDebugEntities)
2278 xmlGenericError(xmlGenericErrorContext,
2279 "Popping input %d\n", ctxt->inputNr);
2280 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2281 (ctxt->instate != XML_PARSER_EOF))
2282 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2283 "Unfinished entity outside the DTD");
2284 xmlFreeInputStream(inputPop(ctxt));
2285 if (*ctxt->input->cur == 0)
2286 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2287 return(CUR);
2288 }
2289
2290 /**
2291 * xmlPushInput:
2292 * @ctxt: an XML parser context
2293 * @input: an XML parser input fragment (entity, XML fragment ...).
2294 *
2295 * xmlPushInput: switch to a new input stream which is stacked on top
2296 * of the previous one(s).
2297 * Returns -1 in case of error or the index in the input stack
2298 */
2299 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2300 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2301 int ret;
2302 if (input == NULL) return(-1);
2303
2304 if (xmlParserDebugEntities) {
2305 if ((ctxt->input != NULL) && (ctxt->input->filename))
2306 xmlGenericError(xmlGenericErrorContext,
2307 "%s(%d): ", ctxt->input->filename,
2308 ctxt->input->line);
2309 xmlGenericError(xmlGenericErrorContext,
2310 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2311 }
2312 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2313 (ctxt->inputNr > 1024)) {
2314 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2315 while (ctxt->inputNr > 1)
2316 xmlFreeInputStream(inputPop(ctxt));
2317 return(-1);
2318 }
2319 ret = inputPush(ctxt, input);
2320 if (ctxt->instate == XML_PARSER_EOF)
2321 return(-1);
2322 GROW;
2323 return(ret);
2324 }
2325
2326 /**
2327 * xmlParseCharRef:
2328 * @ctxt: an XML parser context
2329 *
2330 * parse Reference declarations
2331 *
2332 * [66] CharRef ::= '&#' [0-9]+ ';' |
2333 * '&#x' [0-9a-fA-F]+ ';'
2334 *
2335 * [ WFC: Legal Character ]
2336 * Characters referred to using character references must match the
2337 * production for Char.
2338 *
2339 * Returns the value parsed (as an int), 0 in case of error
2340 */
2341 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2342 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2343 int val = 0;
2344 int count = 0;
2345
2346 /*
2347 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2348 */
2349 if ((RAW == '&') && (NXT(1) == '#') &&
2350 (NXT(2) == 'x')) {
2351 SKIP(3);
2352 GROW;
2353 while (RAW != ';') { /* loop blocked by count */
2354 if (count++ > 20) {
2355 count = 0;
2356 GROW;
2357 if (ctxt->instate == XML_PARSER_EOF)
2358 return(0);
2359 }
2360 if ((RAW >= '0') && (RAW <= '9'))
2361 val = val * 16 + (CUR - '0');
2362 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2363 val = val * 16 + (CUR - 'a') + 10;
2364 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2365 val = val * 16 + (CUR - 'A') + 10;
2366 else {
2367 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2368 val = 0;
2369 break;
2370 }
2371 if (val > 0x110000)
2372 val = 0x110000;
2373
2374 NEXT;
2375 count++;
2376 }
2377 if (RAW == ';') {
2378 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2379 ctxt->input->col++;
2380 ctxt->input->cur++;
2381 }
2382 } else if ((RAW == '&') && (NXT(1) == '#')) {
2383 SKIP(2);
2384 GROW;
2385 while (RAW != ';') { /* loop blocked by count */
2386 if (count++ > 20) {
2387 count = 0;
2388 GROW;
2389 if (ctxt->instate == XML_PARSER_EOF)
2390 return(0);
2391 }
2392 if ((RAW >= '0') && (RAW <= '9'))
2393 val = val * 10 + (CUR - '0');
2394 else {
2395 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2396 val = 0;
2397 break;
2398 }
2399 if (val > 0x110000)
2400 val = 0x110000;
2401
2402 NEXT;
2403 count++;
2404 }
2405 if (RAW == ';') {
2406 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2407 ctxt->input->col++;
2408 ctxt->input->cur++;
2409 }
2410 } else {
2411 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2412 }
2413
2414 /*
2415 * [ WFC: Legal Character ]
2416 * Characters referred to using character references must match the
2417 * production for Char.
2418 */
2419 if (val >= 0x110000) {
2420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2421 "xmlParseCharRef: character reference out of bounds\n",
2422 val);
2423 } else if (IS_CHAR(val)) {
2424 return(val);
2425 } else {
2426 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2427 "xmlParseCharRef: invalid xmlChar value %d\n",
2428 val);
2429 }
2430 return(0);
2431 }
2432
2433 /**
2434 * xmlParseStringCharRef:
2435 * @ctxt: an XML parser context
2436 * @str: a pointer to an index in the string
2437 *
2438 * parse Reference declarations, variant parsing from a string rather
2439 * than an an input flow.
2440 *
2441 * [66] CharRef ::= '&#' [0-9]+ ';' |
2442 * '&#x' [0-9a-fA-F]+ ';'
2443 *
2444 * [ WFC: Legal Character ]
2445 * Characters referred to using character references must match the
2446 * production for Char.
2447 *
2448 * Returns the value parsed (as an int), 0 in case of error, str will be
2449 * updated to the current value of the index
2450 */
2451 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2452 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2453 const xmlChar *ptr;
2454 xmlChar cur;
2455 int val = 0;
2456
2457 if ((str == NULL) || (*str == NULL)) return(0);
2458 ptr = *str;
2459 cur = *ptr;
2460 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2461 ptr += 3;
2462 cur = *ptr;
2463 while (cur != ';') { /* Non input consuming loop */
2464 if ((cur >= '0') && (cur <= '9'))
2465 val = val * 16 + (cur - '0');
2466 else if ((cur >= 'a') && (cur <= 'f'))
2467 val = val * 16 + (cur - 'a') + 10;
2468 else if ((cur >= 'A') && (cur <= 'F'))
2469 val = val * 16 + (cur - 'A') + 10;
2470 else {
2471 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2472 val = 0;
2473 break;
2474 }
2475 if (val > 0x110000)
2476 val = 0x110000;
2477
2478 ptr++;
2479 cur = *ptr;
2480 }
2481 if (cur == ';')
2482 ptr++;
2483 } else if ((cur == '&') && (ptr[1] == '#')){
2484 ptr += 2;
2485 cur = *ptr;
2486 while (cur != ';') { /* Non input consuming loops */
2487 if ((cur >= '0') && (cur <= '9'))
2488 val = val * 10 + (cur - '0');
2489 else {
2490 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2491 val = 0;
2492 break;
2493 }
2494 if (val > 0x110000)
2495 val = 0x110000;
2496
2497 ptr++;
2498 cur = *ptr;
2499 }
2500 if (cur == ';')
2501 ptr++;
2502 } else {
2503 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2504 return(0);
2505 }
2506 *str = ptr;
2507
2508 /*
2509 * [ WFC: Legal Character ]
2510 * Characters referred to using character references must match the
2511 * production for Char.
2512 */
2513 if (val >= 0x110000) {
2514 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2515 "xmlParseStringCharRef: character reference out of bounds\n",
2516 val);
2517 } else if (IS_CHAR(val)) {
2518 return(val);
2519 } else {
2520 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2521 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2522 val);
2523 }
2524 return(0);
2525 }
2526
2527 /**
2528 * xmlParserHandlePEReference:
2529 * @ctxt: the parser context
2530 *
2531 * [69] PEReference ::= '%' Name ';'
2532 *
2533 * [ WFC: No Recursion ]
2534 * A parsed entity must not contain a recursive
2535 * reference to itself, either directly or indirectly.
2536 *
2537 * [ WFC: Entity Declared ]
2538 * In a document without any DTD, a document with only an internal DTD
2539 * subset which contains no parameter entity references, or a document
2540 * with "standalone='yes'", ... ... The declaration of a parameter
2541 * entity must precede any reference to it...
2542 *
2543 * [ VC: Entity Declared ]
2544 * In a document with an external subset or external parameter entities
2545 * with "standalone='no'", ... ... The declaration of a parameter entity
2546 * must precede any reference to it...
2547 *
2548 * [ WFC: In DTD ]
2549 * Parameter-entity references may only appear in the DTD.
2550 * NOTE: misleading but this is handled.
2551 *
2552 * A PEReference may have been detected in the current input stream
2553 * the handling is done accordingly to
2554 * http://www.w3.org/TR/REC-xml#entproc
2555 * i.e.
2556 * - Included in literal in entity values
2557 * - Included as Parameter Entity reference within DTDs
2558 */
2559 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2560 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2561 switch(ctxt->instate) {
2562 case XML_PARSER_CDATA_SECTION:
2563 return;
2564 case XML_PARSER_COMMENT:
2565 return;
2566 case XML_PARSER_START_TAG:
2567 return;
2568 case XML_PARSER_END_TAG:
2569 return;
2570 case XML_PARSER_EOF:
2571 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2572 return;
2573 case XML_PARSER_PROLOG:
2574 case XML_PARSER_START:
2575 case XML_PARSER_MISC:
2576 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2577 return;
2578 case XML_PARSER_ENTITY_DECL:
2579 case XML_PARSER_CONTENT:
2580 case XML_PARSER_ATTRIBUTE_VALUE:
2581 case XML_PARSER_PI:
2582 case XML_PARSER_SYSTEM_LITERAL:
2583 case XML_PARSER_PUBLIC_LITERAL:
2584 /* we just ignore it there */
2585 return;
2586 case XML_PARSER_EPILOG:
2587 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2588 return;
2589 case XML_PARSER_ENTITY_VALUE:
2590 /*
2591 * NOTE: in the case of entity values, we don't do the
2592 * substitution here since we need the literal
2593 * entity value to be able to save the internal
2594 * subset of the document.
2595 * This will be handled by xmlStringDecodeEntities
2596 */
2597 return;
2598 case XML_PARSER_DTD:
2599 /*
2600 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2601 * In the internal DTD subset, parameter-entity references
2602 * can occur only where markup declarations can occur, not
2603 * within markup declarations.
2604 * In that case this is handled in xmlParseMarkupDecl
2605 */
2606 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2607 return;
2608 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2609 return;
2610 break;
2611 case XML_PARSER_IGNORE:
2612 return;
2613 }
2614
2615 xmlParsePEReference(ctxt);
2616 }
2617
2618 /*
2619 * Macro used to grow the current buffer.
2620 * buffer##_size is expected to be a size_t
2621 * mem_error: is expected to handle memory allocation failures
2622 */
2623 #define growBuffer(buffer, n) { \
2624 xmlChar *tmp; \
2625 size_t new_size = buffer##_size * 2 + n; \
2626 if (new_size < buffer##_size) goto mem_error; \
2627 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2628 if (tmp == NULL) goto mem_error; \
2629 buffer = tmp; \
2630 buffer##_size = new_size; \
2631 }
2632
2633 /**
2634 * xmlStringLenDecodeEntities:
2635 * @ctxt: the parser context
2636 * @str: the input string
2637 * @len: the string length
2638 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2639 * @end: an end marker xmlChar, 0 if none
2640 * @end2: an end marker xmlChar, 0 if none
2641 * @end3: an end marker xmlChar, 0 if none
2642 *
2643 * Takes a entity string content and process to do the adequate substitutions.
2644 *
2645 * [67] Reference ::= EntityRef | CharRef
2646 *
2647 * [69] PEReference ::= '%' Name ';'
2648 *
2649 * Returns A newly allocated string with the substitution done. The caller
2650 * must deallocate it !
2651 */
2652 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2653 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2654 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2655 xmlChar *buffer = NULL;
2656 size_t buffer_size = 0;
2657 size_t nbchars = 0;
2658
2659 xmlChar *current = NULL;
2660 xmlChar *rep = NULL;
2661 const xmlChar *last;
2662 xmlEntityPtr ent;
2663 int c,l;
2664
2665 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2666 return(NULL);
2667 last = str + len;
2668
2669 if (((ctxt->depth > 40) &&
2670 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2671 (ctxt->depth > 1024)) {
2672 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2673 return(NULL);
2674 }
2675
2676 /*
2677 * allocate a translation buffer.
2678 */
2679 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2680 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2681 if (buffer == NULL) goto mem_error;
2682
2683 /*
2684 * OK loop until we reach one of the ending char or a size limit.
2685 * we are operating on already parsed values.
2686 */
2687 if (str < last)
2688 c = CUR_SCHAR(str, l);
2689 else
2690 c = 0;
2691 while ((c != 0) && (c != end) && /* non input consuming loop */
2692 (c != end2) && (c != end3) &&
2693 (ctxt->instate != XML_PARSER_EOF)) {
2694
2695 if (c == 0) break;
2696 if ((c == '&') && (str[1] == '#')) {
2697 int val = xmlParseStringCharRef(ctxt, &str);
2698 if (val == 0)
2699 goto int_error;
2700 COPY_BUF(0,buffer,nbchars,val);
2701 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2702 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2703 }
2704 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2705 if (xmlParserDebugEntities)
2706 xmlGenericError(xmlGenericErrorContext,
2707 "String decoding Entity Reference: %.30s\n",
2708 str);
2709 ent = xmlParseStringEntityRef(ctxt, &str);
2710 xmlParserEntityCheck(ctxt, 0, ent, 0);
2711 if (ent != NULL)
2712 ctxt->nbentities += ent->checked / 2;
2713 if ((ent != NULL) &&
2714 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2715 if (ent->content != NULL) {
2716 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2717 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2718 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2719 }
2720 } else {
2721 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2722 "predefined entity has no content\n");
2723 goto int_error;
2724 }
2725 } else if ((ent != NULL) && (ent->content != NULL)) {
2726 ctxt->depth++;
2727 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2728 0, 0, 0);
2729 ctxt->depth--;
2730 if (rep == NULL) {
2731 ent->content[0] = 0;
2732 goto int_error;
2733 }
2734
2735 current = rep;
2736 while (*current != 0) { /* non input consuming loop */
2737 buffer[nbchars++] = *current++;
2738 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2739 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2740 goto int_error;
2741 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2742 }
2743 }
2744 xmlFree(rep);
2745 rep = NULL;
2746 } else if (ent != NULL) {
2747 int i = xmlStrlen(ent->name);
2748 const xmlChar *cur = ent->name;
2749
2750 buffer[nbchars++] = '&';
2751 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2752 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2753 }
2754 for (;i > 0;i--)
2755 buffer[nbchars++] = *cur++;
2756 buffer[nbchars++] = ';';
2757 }
2758 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2759 if (xmlParserDebugEntities)
2760 xmlGenericError(xmlGenericErrorContext,
2761 "String decoding PE Reference: %.30s\n", str);
2762 ent = xmlParseStringPEReference(ctxt, &str);
2763 xmlParserEntityCheck(ctxt, 0, ent, 0);
2764 if (ent != NULL)
2765 ctxt->nbentities += ent->checked / 2;
2766 if (ent != NULL) {
2767 if (ent->content == NULL) {
2768 /*
2769 * Note: external parsed entities will not be loaded,
2770 * it is not required for a non-validating parser to
2771 * complete external PEReferences coming from the
2772 * internal subset
2773 */
2774 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2775 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2776 (ctxt->validate != 0)) {
2777 xmlLoadEntityContent(ctxt, ent);
2778 } else {
2779 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2780 "not validating will not read content for PE entity %s\n",
2781 ent->name, NULL);
2782 }
2783 }
2784 ctxt->depth++;
2785 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2786 0, 0, 0);
2787 ctxt->depth--;
2788 if (rep == NULL) {
2789 if (ent->content != NULL)
2790 ent->content[0] = 0;
2791 goto int_error;
2792 }
2793 current = rep;
2794 while (*current != 0) { /* non input consuming loop */
2795 buffer[nbchars++] = *current++;
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2798 goto int_error;
2799 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2800 }
2801 }
2802 xmlFree(rep);
2803 rep = NULL;
2804 }
2805 } else {
2806 COPY_BUF(l,buffer,nbchars,c);
2807 str += l;
2808 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2809 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2810 }
2811 }
2812 if (str < last)
2813 c = CUR_SCHAR(str, l);
2814 else
2815 c = 0;
2816 }
2817 buffer[nbchars] = 0;
2818 return(buffer);
2819
2820 mem_error:
2821 xmlErrMemory(ctxt, NULL);
2822 int_error:
2823 if (rep != NULL)
2824 xmlFree(rep);
2825 if (buffer != NULL)
2826 xmlFree(buffer);
2827 return(NULL);
2828 }
2829
2830 /**
2831 * xmlStringDecodeEntities:
2832 * @ctxt: the parser context
2833 * @str: the input string
2834 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2835 * @end: an end marker xmlChar, 0 if none
2836 * @end2: an end marker xmlChar, 0 if none
2837 * @end3: an end marker xmlChar, 0 if none
2838 *
2839 * Takes a entity string content and process to do the adequate substitutions.
2840 *
2841 * [67] Reference ::= EntityRef | CharRef
2842 *
2843 * [69] PEReference ::= '%' Name ';'
2844 *
2845 * Returns A newly allocated string with the substitution done. The caller
2846 * must deallocate it !
2847 */
2848 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2849 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2850 xmlChar end, xmlChar end2, xmlChar end3) {
2851 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2852 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2853 end, end2, end3));
2854 }
2855
2856 /************************************************************************
2857 * *
2858 * Commodity functions, cleanup needed ? *
2859 * *
2860 ************************************************************************/
2861
2862 /**
2863 * areBlanks:
2864 * @ctxt: an XML parser context
2865 * @str: a xmlChar *
2866 * @len: the size of @str
2867 * @blank_chars: we know the chars are blanks
2868 *
2869 * Is this a sequence of blank chars that one can ignore ?
2870 *
2871 * Returns 1 if ignorable 0 otherwise.
2872 */
2873
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2874 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2875 int blank_chars) {
2876 int i, ret;
2877 xmlNodePtr lastChild;
2878
2879 /*
2880 * Don't spend time trying to differentiate them, the same callback is
2881 * used !
2882 */
2883 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2884 return(0);
2885
2886 /*
2887 * Check for xml:space value.
2888 */
2889 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2890 (*(ctxt->space) == -2))
2891 return(0);
2892
2893 /*
2894 * Check that the string is made of blanks
2895 */
2896 if (blank_chars == 0) {
2897 for (i = 0;i < len;i++)
2898 if (!(IS_BLANK_CH(str[i]))) return(0);
2899 }
2900
2901 /*
2902 * Look if the element is mixed content in the DTD if available
2903 */
2904 if (ctxt->node == NULL) return(0);
2905 if (ctxt->myDoc != NULL) {
2906 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2907 if (ret == 0) return(1);
2908 if (ret == 1) return(0);
2909 }
2910
2911 /*
2912 * Otherwise, heuristic :-\
2913 */
2914 if ((RAW != '<') && (RAW != 0xD)) return(0);
2915 if ((ctxt->node->children == NULL) &&
2916 (RAW == '<') && (NXT(1) == '/')) return(0);
2917
2918 lastChild = xmlGetLastChild(ctxt->node);
2919 if (lastChild == NULL) {
2920 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2921 (ctxt->node->content != NULL)) return(0);
2922 } else if (xmlNodeIsText(lastChild))
2923 return(0);
2924 else if ((ctxt->node->children != NULL) &&
2925 (xmlNodeIsText(ctxt->node->children)))
2926 return(0);
2927 return(1);
2928 }
2929
2930 /************************************************************************
2931 * *
2932 * Extra stuff for namespace support *
2933 * Relates to http://www.w3.org/TR/WD-xml-names *
2934 * *
2935 ************************************************************************/
2936
2937 /**
2938 * xmlSplitQName:
2939 * @ctxt: an XML parser context
2940 * @name: an XML parser context
2941 * @prefix: a xmlChar **
2942 *
2943 * parse an UTF8 encoded XML qualified name string
2944 *
2945 * [NS 5] QName ::= (Prefix ':')? LocalPart
2946 *
2947 * [NS 6] Prefix ::= NCName
2948 *
2949 * [NS 7] LocalPart ::= NCName
2950 *
2951 * Returns the local part, and prefix is updated
2952 * to get the Prefix if any.
2953 */
2954
2955 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2956 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2957 xmlChar buf[XML_MAX_NAMELEN + 5];
2958 xmlChar *buffer = NULL;
2959 int len = 0;
2960 int max = XML_MAX_NAMELEN;
2961 xmlChar *ret = NULL;
2962 const xmlChar *cur = name;
2963 int c;
2964
2965 if (prefix == NULL) return(NULL);
2966 *prefix = NULL;
2967
2968 if (cur == NULL) return(NULL);
2969
2970 #ifndef XML_XML_NAMESPACE
2971 /* xml: prefix is not really a namespace */
2972 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2973 (cur[2] == 'l') && (cur[3] == ':'))
2974 return(xmlStrdup(name));
2975 #endif
2976
2977 /* nasty but well=formed */
2978 if (cur[0] == ':')
2979 return(xmlStrdup(name));
2980
2981 c = *cur++;
2982 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2983 buf[len++] = c;
2984 c = *cur++;
2985 }
2986 if (len >= max) {
2987 /*
2988 * Okay someone managed to make a huge name, so he's ready to pay
2989 * for the processing speed.
2990 */
2991 max = len * 2;
2992
2993 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2994 if (buffer == NULL) {
2995 xmlErrMemory(ctxt, NULL);
2996 return(NULL);
2997 }
2998 memcpy(buffer, buf, len);
2999 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3000 if (len + 10 > max) {
3001 xmlChar *tmp;
3002
3003 max *= 2;
3004 tmp = (xmlChar *) xmlRealloc(buffer,
3005 max * sizeof(xmlChar));
3006 if (tmp == NULL) {
3007 xmlFree(buffer);
3008 xmlErrMemory(ctxt, NULL);
3009 return(NULL);
3010 }
3011 buffer = tmp;
3012 }
3013 buffer[len++] = c;
3014 c = *cur++;
3015 }
3016 buffer[len] = 0;
3017 }
3018
3019 if ((c == ':') && (*cur == 0)) {
3020 if (buffer != NULL)
3021 xmlFree(buffer);
3022 *prefix = NULL;
3023 return(xmlStrdup(name));
3024 }
3025
3026 if (buffer == NULL)
3027 ret = xmlStrndup(buf, len);
3028 else {
3029 ret = buffer;
3030 buffer = NULL;
3031 max = XML_MAX_NAMELEN;
3032 }
3033
3034
3035 if (c == ':') {
3036 c = *cur;
3037 *prefix = ret;
3038 if (c == 0) {
3039 return(xmlStrndup(BAD_CAST "", 0));
3040 }
3041 len = 0;
3042
3043 /*
3044 * Check that the first character is proper to start
3045 * a new name
3046 */
3047 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3048 ((c >= 0x41) && (c <= 0x5A)) ||
3049 (c == '_') || (c == ':'))) {
3050 int l;
3051 int first = CUR_SCHAR(cur, l);
3052
3053 if (!IS_LETTER(first) && (first != '_')) {
3054 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3055 "Name %s is not XML Namespace compliant\n",
3056 name);
3057 }
3058 }
3059 cur++;
3060
3061 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3062 buf[len++] = c;
3063 c = *cur++;
3064 }
3065 if (len >= max) {
3066 /*
3067 * Okay someone managed to make a huge name, so he's ready to pay
3068 * for the processing speed.
3069 */
3070 max = len * 2;
3071
3072 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3073 if (buffer == NULL) {
3074 xmlErrMemory(ctxt, NULL);
3075 return(NULL);
3076 }
3077 memcpy(buffer, buf, len);
3078 while (c != 0) { /* tested bigname2.xml */
3079 if (len + 10 > max) {
3080 xmlChar *tmp;
3081
3082 max *= 2;
3083 tmp = (xmlChar *) xmlRealloc(buffer,
3084 max * sizeof(xmlChar));
3085 if (tmp == NULL) {
3086 xmlErrMemory(ctxt, NULL);
3087 xmlFree(buffer);
3088 return(NULL);
3089 }
3090 buffer = tmp;
3091 }
3092 buffer[len++] = c;
3093 c = *cur++;
3094 }
3095 buffer[len] = 0;
3096 }
3097
3098 if (buffer == NULL)
3099 ret = xmlStrndup(buf, len);
3100 else {
3101 ret = buffer;
3102 }
3103 }
3104
3105 return(ret);
3106 }
3107
3108 /************************************************************************
3109 * *
3110 * The parser itself *
3111 * Relates to http://www.w3.org/TR/REC-xml *
3112 * *
3113 ************************************************************************/
3114
3115 /************************************************************************
3116 * *
3117 * Routines to parse Name, NCName and NmToken *
3118 * *
3119 ************************************************************************/
3120 #ifdef DEBUG
3121 static unsigned long nbParseName = 0;
3122 static unsigned long nbParseNmToken = 0;
3123 static unsigned long nbParseNCName = 0;
3124 static unsigned long nbParseNCNameComplex = 0;
3125 static unsigned long nbParseNameComplex = 0;
3126 static unsigned long nbParseStringName = 0;
3127 #endif
3128
3129 /*
3130 * The two following functions are related to the change of accepted
3131 * characters for Name and NmToken in the Revision 5 of XML-1.0
3132 * They correspond to the modified production [4] and the new production [4a]
3133 * changes in that revision. Also note that the macros used for the
3134 * productions Letter, Digit, CombiningChar and Extender are not needed
3135 * anymore.
3136 * We still keep compatibility to pre-revision5 parsing semantic if the
3137 * new XML_PARSE_OLD10 option is given to the parser.
3138 */
3139 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3140 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3141 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3142 /*
3143 * Use the new checks of production [4] [4a] amd [5] of the
3144 * Update 5 of XML-1.0
3145 */
3146 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3147 (((c >= 'a') && (c <= 'z')) ||
3148 ((c >= 'A') && (c <= 'Z')) ||
3149 (c == '_') || (c == ':') ||
3150 ((c >= 0xC0) && (c <= 0xD6)) ||
3151 ((c >= 0xD8) && (c <= 0xF6)) ||
3152 ((c >= 0xF8) && (c <= 0x2FF)) ||
3153 ((c >= 0x370) && (c <= 0x37D)) ||
3154 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3155 ((c >= 0x200C) && (c <= 0x200D)) ||
3156 ((c >= 0x2070) && (c <= 0x218F)) ||
3157 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3158 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3159 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3160 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3161 ((c >= 0x10000) && (c <= 0xEFFFF))))
3162 return(1);
3163 } else {
3164 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3165 return(1);
3166 }
3167 return(0);
3168 }
3169
3170 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3171 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3172 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3173 /*
3174 * Use the new checks of production [4] [4a] amd [5] of the
3175 * Update 5 of XML-1.0
3176 */
3177 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3178 (((c >= 'a') && (c <= 'z')) ||
3179 ((c >= 'A') && (c <= 'Z')) ||
3180 ((c >= '0') && (c <= '9')) || /* !start */
3181 (c == '_') || (c == ':') ||
3182 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3183 ((c >= 0xC0) && (c <= 0xD6)) ||
3184 ((c >= 0xD8) && (c <= 0xF6)) ||
3185 ((c >= 0xF8) && (c <= 0x2FF)) ||
3186 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3187 ((c >= 0x370) && (c <= 0x37D)) ||
3188 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3189 ((c >= 0x200C) && (c <= 0x200D)) ||
3190 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3191 ((c >= 0x2070) && (c <= 0x218F)) ||
3192 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3193 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3194 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3195 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3196 ((c >= 0x10000) && (c <= 0xEFFFF))))
3197 return(1);
3198 } else {
3199 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3200 (c == '.') || (c == '-') ||
3201 (c == '_') || (c == ':') ||
3202 (IS_COMBINING(c)) ||
3203 (IS_EXTENDER(c)))
3204 return(1);
3205 }
3206 return(0);
3207 }
3208
3209 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3210 int *len, int *alloc, int normalize);
3211
3212 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3213 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3214 int len = 0, l;
3215 int c;
3216 int count = 0;
3217 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3218 XML_MAX_TEXT_LENGTH :
3219 XML_MAX_NAME_LENGTH;
3220
3221 #ifdef DEBUG
3222 nbParseNameComplex++;
3223 #endif
3224
3225 /*
3226 * Handler for more complex cases
3227 */
3228 GROW;
3229 if (ctxt->instate == XML_PARSER_EOF)
3230 return(NULL);
3231 c = CUR_CHAR(l);
3232 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3233 /*
3234 * Use the new checks of production [4] [4a] amd [5] of the
3235 * Update 5 of XML-1.0
3236 */
3237 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238 (!(((c >= 'a') && (c <= 'z')) ||
3239 ((c >= 'A') && (c <= 'Z')) ||
3240 (c == '_') || (c == ':') ||
3241 ((c >= 0xC0) && (c <= 0xD6)) ||
3242 ((c >= 0xD8) && (c <= 0xF6)) ||
3243 ((c >= 0xF8) && (c <= 0x2FF)) ||
3244 ((c >= 0x370) && (c <= 0x37D)) ||
3245 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3246 ((c >= 0x200C) && (c <= 0x200D)) ||
3247 ((c >= 0x2070) && (c <= 0x218F)) ||
3248 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3249 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3250 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3251 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3252 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3253 return(NULL);
3254 }
3255 len += l;
3256 NEXTL(l);
3257 c = CUR_CHAR(l);
3258 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3259 (((c >= 'a') && (c <= 'z')) ||
3260 ((c >= 'A') && (c <= 'Z')) ||
3261 ((c >= '0') && (c <= '9')) || /* !start */
3262 (c == '_') || (c == ':') ||
3263 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3264 ((c >= 0xC0) && (c <= 0xD6)) ||
3265 ((c >= 0xD8) && (c <= 0xF6)) ||
3266 ((c >= 0xF8) && (c <= 0x2FF)) ||
3267 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3268 ((c >= 0x370) && (c <= 0x37D)) ||
3269 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3270 ((c >= 0x200C) && (c <= 0x200D)) ||
3271 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3272 ((c >= 0x2070) && (c <= 0x218F)) ||
3273 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3274 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3275 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3276 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3277 ((c >= 0x10000) && (c <= 0xEFFFF))
3278 )) {
3279 if (count++ > XML_PARSER_CHUNK_SIZE) {
3280 count = 0;
3281 GROW;
3282 if (ctxt->instate == XML_PARSER_EOF)
3283 return(NULL);
3284 }
3285 if (len <= INT_MAX - l)
3286 len += l;
3287 NEXTL(l);
3288 c = CUR_CHAR(l);
3289 }
3290 } else {
3291 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3292 (!IS_LETTER(c) && (c != '_') &&
3293 (c != ':'))) {
3294 return(NULL);
3295 }
3296 len += l;
3297 NEXTL(l);
3298 c = CUR_CHAR(l);
3299
3300 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3301 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3302 (c == '.') || (c == '-') ||
3303 (c == '_') || (c == ':') ||
3304 (IS_COMBINING(c)) ||
3305 (IS_EXTENDER(c)))) {
3306 if (count++ > XML_PARSER_CHUNK_SIZE) {
3307 count = 0;
3308 GROW;
3309 if (ctxt->instate == XML_PARSER_EOF)
3310 return(NULL);
3311 }
3312 if (len <= INT_MAX - l)
3313 len += l;
3314 NEXTL(l);
3315 c = CUR_CHAR(l);
3316 }
3317 }
3318 if (len > maxLength) {
3319 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3320 return(NULL);
3321 }
3322 if (ctxt->input->cur - ctxt->input->base < len) {
3323 /*
3324 * There were a couple of bugs where PERefs lead to to a change
3325 * of the buffer. Check the buffer size to avoid passing an invalid
3326 * pointer to xmlDictLookup.
3327 */
3328 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3329 "unexpected change of input buffer");
3330 return (NULL);
3331 }
3332 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3333 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3334 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3335 }
3336
3337 /**
3338 * xmlParseName:
3339 * @ctxt: an XML parser context
3340 *
3341 * parse an XML name.
3342 *
3343 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3344 * CombiningChar | Extender
3345 *
3346 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3347 *
3348 * [6] Names ::= Name (#x20 Name)*
3349 *
3350 * Returns the Name parsed or NULL
3351 */
3352
3353 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3354 xmlParseName(xmlParserCtxtPtr ctxt) {
3355 const xmlChar *in;
3356 const xmlChar *ret;
3357 size_t count = 0;
3358 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3359 XML_MAX_TEXT_LENGTH :
3360 XML_MAX_NAME_LENGTH;
3361
3362 GROW;
3363
3364 #ifdef DEBUG
3365 nbParseName++;
3366 #endif
3367
3368 /*
3369 * Accelerator for simple ASCII names
3370 */
3371 in = ctxt->input->cur;
3372 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 ((*in >= 0x41) && (*in <= 0x5A)) ||
3374 (*in == '_') || (*in == ':')) {
3375 in++;
3376 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 ((*in >= 0x30) && (*in <= 0x39)) ||
3379 (*in == '_') || (*in == '-') ||
3380 (*in == ':') || (*in == '.'))
3381 in++;
3382 if ((*in > 0) && (*in < 0x80)) {
3383 count = in - ctxt->input->cur;
3384 if (count > maxLength) {
3385 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386 return(NULL);
3387 }
3388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389 ctxt->input->cur = in;
3390 ctxt->input->col += count;
3391 if (ret == NULL)
3392 xmlErrMemory(ctxt, NULL);
3393 return(ret);
3394 }
3395 }
3396 /* accelerator for special cases */
3397 return(xmlParseNameComplex(ctxt));
3398 }
3399
3400 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3401 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402 int len = 0, l;
3403 int c;
3404 int count = 0;
3405 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406 XML_MAX_TEXT_LENGTH :
3407 XML_MAX_NAME_LENGTH;
3408 size_t startPosition = 0;
3409
3410 #ifdef DEBUG
3411 nbParseNCNameComplex++;
3412 #endif
3413
3414 /*
3415 * Handler for more complex cases
3416 */
3417 GROW;
3418 startPosition = CUR_PTR - BASE_PTR;
3419 c = CUR_CHAR(l);
3420 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3421 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3422 return(NULL);
3423 }
3424
3425 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3426 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3427 if (count++ > XML_PARSER_CHUNK_SIZE) {
3428 count = 0;
3429 GROW;
3430 if (ctxt->instate == XML_PARSER_EOF)
3431 return(NULL);
3432 }
3433 if (len <= INT_MAX - l)
3434 len += l;
3435 NEXTL(l);
3436 c = CUR_CHAR(l);
3437 if (c == 0) {
3438 count = 0;
3439 /*
3440 * when shrinking to extend the buffer we really need to preserve
3441 * the part of the name we already parsed. Hence rolling back
3442 * by current length.
3443 */
3444 ctxt->input->cur -= l;
3445 GROW;
3446 if (ctxt->instate == XML_PARSER_EOF)
3447 return(NULL);
3448 ctxt->input->cur += l;
3449 c = CUR_CHAR(l);
3450 }
3451 }
3452 if (len > maxLength) {
3453 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3454 return(NULL);
3455 }
3456 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3457 }
3458
3459 /**
3460 * xmlParseNCName:
3461 * @ctxt: an XML parser context
3462 * @len: length of the string parsed
3463 *
3464 * parse an XML name.
3465 *
3466 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3467 * CombiningChar | Extender
3468 *
3469 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3470 *
3471 * Returns the Name parsed or NULL
3472 */
3473
3474 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3475 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3476 const xmlChar *in, *e;
3477 const xmlChar *ret;
3478 size_t count = 0;
3479 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3480 XML_MAX_TEXT_LENGTH :
3481 XML_MAX_NAME_LENGTH;
3482
3483 #ifdef DEBUG
3484 nbParseNCName++;
3485 #endif
3486
3487 /*
3488 * Accelerator for simple ASCII names
3489 */
3490 in = ctxt->input->cur;
3491 e = ctxt->input->end;
3492 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3493 ((*in >= 0x41) && (*in <= 0x5A)) ||
3494 (*in == '_')) && (in < e)) {
3495 in++;
3496 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3497 ((*in >= 0x41) && (*in <= 0x5A)) ||
3498 ((*in >= 0x30) && (*in <= 0x39)) ||
3499 (*in == '_') || (*in == '-') ||
3500 (*in == '.')) && (in < e))
3501 in++;
3502 if (in >= e)
3503 goto complex;
3504 if ((*in > 0) && (*in < 0x80)) {
3505 count = in - ctxt->input->cur;
3506 if (count > maxLength) {
3507 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3508 return(NULL);
3509 }
3510 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3511 ctxt->input->cur = in;
3512 ctxt->input->col += count;
3513 if (ret == NULL) {
3514 xmlErrMemory(ctxt, NULL);
3515 }
3516 return(ret);
3517 }
3518 }
3519 complex:
3520 return(xmlParseNCNameComplex(ctxt));
3521 }
3522
3523 /**
3524 * xmlParseNameAndCompare:
3525 * @ctxt: an XML parser context
3526 *
3527 * parse an XML name and compares for match
3528 * (specialized for endtag parsing)
3529 *
3530 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3531 * and the name for mismatch
3532 */
3533
3534 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3535 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3536 register const xmlChar *cmp = other;
3537 register const xmlChar *in;
3538 const xmlChar *ret;
3539
3540 GROW;
3541 if (ctxt->instate == XML_PARSER_EOF)
3542 return(NULL);
3543
3544 in = ctxt->input->cur;
3545 while (*in != 0 && *in == *cmp) {
3546 ++in;
3547 ++cmp;
3548 }
3549 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3550 /* success */
3551 ctxt->input->col += in - ctxt->input->cur;
3552 ctxt->input->cur = in;
3553 return (const xmlChar*) 1;
3554 }
3555 /* failure (or end of input buffer), check with full function */
3556 ret = xmlParseName (ctxt);
3557 /* strings coming from the dictionary direct compare possible */
3558 if (ret == other) {
3559 return (const xmlChar*) 1;
3560 }
3561 return ret;
3562 }
3563
3564 /**
3565 * xmlParseStringName:
3566 * @ctxt: an XML parser context
3567 * @str: a pointer to the string pointer (IN/OUT)
3568 *
3569 * parse an XML name.
3570 *
3571 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3572 * CombiningChar | Extender
3573 *
3574 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3575 *
3576 * [6] Names ::= Name (#x20 Name)*
3577 *
3578 * Returns the Name parsed or NULL. The @str pointer
3579 * is updated to the current location in the string.
3580 */
3581
3582 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3583 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3584 xmlChar buf[XML_MAX_NAMELEN + 5];
3585 const xmlChar *cur = *str;
3586 int len = 0, l;
3587 int c;
3588 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3589 XML_MAX_TEXT_LENGTH :
3590 XML_MAX_NAME_LENGTH;
3591
3592 #ifdef DEBUG
3593 nbParseStringName++;
3594 #endif
3595
3596 c = CUR_SCHAR(cur, l);
3597 if (!xmlIsNameStartChar(ctxt, c)) {
3598 return(NULL);
3599 }
3600
3601 COPY_BUF(l,buf,len,c);
3602 cur += l;
3603 c = CUR_SCHAR(cur, l);
3604 while (xmlIsNameChar(ctxt, c)) {
3605 COPY_BUF(l,buf,len,c);
3606 cur += l;
3607 c = CUR_SCHAR(cur, l);
3608 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3609 /*
3610 * Okay someone managed to make a huge name, so he's ready to pay
3611 * for the processing speed.
3612 */
3613 xmlChar *buffer;
3614 int max = len * 2;
3615
3616 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3617 if (buffer == NULL) {
3618 xmlErrMemory(ctxt, NULL);
3619 return(NULL);
3620 }
3621 memcpy(buffer, buf, len);
3622 while (xmlIsNameChar(ctxt, c)) {
3623 if (len + 10 > max) {
3624 xmlChar *tmp;
3625
3626 max *= 2;
3627 tmp = (xmlChar *) xmlRealloc(buffer,
3628 max * sizeof(xmlChar));
3629 if (tmp == NULL) {
3630 xmlErrMemory(ctxt, NULL);
3631 xmlFree(buffer);
3632 return(NULL);
3633 }
3634 buffer = tmp;
3635 }
3636 COPY_BUF(l,buffer,len,c);
3637 cur += l;
3638 c = CUR_SCHAR(cur, l);
3639 if (len > maxLength) {
3640 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3641 xmlFree(buffer);
3642 return(NULL);
3643 }
3644 }
3645 buffer[len] = 0;
3646 *str = cur;
3647 return(buffer);
3648 }
3649 }
3650 if (len > maxLength) {
3651 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3652 return(NULL);
3653 }
3654 *str = cur;
3655 return(xmlStrndup(buf, len));
3656 }
3657
3658 /**
3659 * xmlParseNmtoken:
3660 * @ctxt: an XML parser context
3661 *
3662 * parse an XML Nmtoken.
3663 *
3664 * [7] Nmtoken ::= (NameChar)+
3665 *
3666 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3667 *
3668 * Returns the Nmtoken parsed or NULL
3669 */
3670
3671 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3672 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3673 xmlChar buf[XML_MAX_NAMELEN + 5];
3674 int len = 0, l;
3675 int c;
3676 int count = 0;
3677 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3678 XML_MAX_TEXT_LENGTH :
3679 XML_MAX_NAME_LENGTH;
3680
3681 #ifdef DEBUG
3682 nbParseNmToken++;
3683 #endif
3684
3685 GROW;
3686 if (ctxt->instate == XML_PARSER_EOF)
3687 return(NULL);
3688 c = CUR_CHAR(l);
3689
3690 while (xmlIsNameChar(ctxt, c)) {
3691 if (count++ > XML_PARSER_CHUNK_SIZE) {
3692 count = 0;
3693 GROW;
3694 }
3695 COPY_BUF(l,buf,len,c);
3696 NEXTL(l);
3697 c = CUR_CHAR(l);
3698 if (c == 0) {
3699 count = 0;
3700 GROW;
3701 if (ctxt->instate == XML_PARSER_EOF)
3702 return(NULL);
3703 c = CUR_CHAR(l);
3704 }
3705 if (len >= XML_MAX_NAMELEN) {
3706 /*
3707 * Okay someone managed to make a huge token, so he's ready to pay
3708 * for the processing speed.
3709 */
3710 xmlChar *buffer;
3711 int max = len * 2;
3712
3713 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3714 if (buffer == NULL) {
3715 xmlErrMemory(ctxt, NULL);
3716 return(NULL);
3717 }
3718 memcpy(buffer, buf, len);
3719 while (xmlIsNameChar(ctxt, c)) {
3720 if (count++ > XML_PARSER_CHUNK_SIZE) {
3721 count = 0;
3722 GROW;
3723 if (ctxt->instate == XML_PARSER_EOF) {
3724 xmlFree(buffer);
3725 return(NULL);
3726 }
3727 }
3728 if (len + 10 > max) {
3729 xmlChar *tmp;
3730
3731 max *= 2;
3732 tmp = (xmlChar *) xmlRealloc(buffer,
3733 max * sizeof(xmlChar));
3734 if (tmp == NULL) {
3735 xmlErrMemory(ctxt, NULL);
3736 xmlFree(buffer);
3737 return(NULL);
3738 }
3739 buffer = tmp;
3740 }
3741 COPY_BUF(l,buffer,len,c);
3742 NEXTL(l);
3743 c = CUR_CHAR(l);
3744 if (len > maxLength) {
3745 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3746 xmlFree(buffer);
3747 return(NULL);
3748 }
3749 }
3750 buffer[len] = 0;
3751 return(buffer);
3752 }
3753 }
3754 if (len == 0)
3755 return(NULL);
3756 if (len > maxLength) {
3757 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3758 return(NULL);
3759 }
3760 return(xmlStrndup(buf, len));
3761 }
3762
3763 /**
3764 * xmlParseEntityValue:
3765 * @ctxt: an XML parser context
3766 * @orig: if non-NULL store a copy of the original entity value
3767 *
3768 * parse a value for ENTITY declarations
3769 *
3770 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771 * "'" ([^%&'] | PEReference | Reference)* "'"
3772 *
3773 * Returns the EntityValue parsed with reference substituted or NULL
3774 */
3775
3776 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3777 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778 xmlChar *buf = NULL;
3779 int len = 0;
3780 int size = XML_PARSER_BUFFER_SIZE;
3781 int c, l;
3782 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783 XML_MAX_HUGE_LENGTH :
3784 XML_MAX_TEXT_LENGTH;
3785 xmlChar stop;
3786 xmlChar *ret = NULL;
3787 const xmlChar *cur = NULL;
3788 xmlParserInputPtr input;
3789
3790 if (RAW == '"') stop = '"';
3791 else if (RAW == '\'') stop = '\'';
3792 else {
3793 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3794 return(NULL);
3795 }
3796 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3797 if (buf == NULL) {
3798 xmlErrMemory(ctxt, NULL);
3799 return(NULL);
3800 }
3801
3802 /*
3803 * The content of the entity definition is copied in a buffer.
3804 */
3805
3806 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807 input = ctxt->input;
3808 GROW;
3809 if (ctxt->instate == XML_PARSER_EOF)
3810 goto error;
3811 NEXT;
3812 c = CUR_CHAR(l);
3813 /*
3814 * NOTE: 4.4.5 Included in Literal
3815 * When a parameter entity reference appears in a literal entity
3816 * value, ... a single or double quote character in the replacement
3817 * text is always treated as a normal data character and will not
3818 * terminate the literal.
3819 * In practice it means we stop the loop only when back at parsing
3820 * the initial entity and the quote is found
3821 */
3822 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3824 if (len + 5 >= size) {
3825 xmlChar *tmp;
3826
3827 size *= 2;
3828 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3829 if (tmp == NULL) {
3830 xmlErrMemory(ctxt, NULL);
3831 goto error;
3832 }
3833 buf = tmp;
3834 }
3835 COPY_BUF(l,buf,len,c);
3836 NEXTL(l);
3837
3838 GROW;
3839 c = CUR_CHAR(l);
3840 if (c == 0) {
3841 GROW;
3842 c = CUR_CHAR(l);
3843 }
3844
3845 if (len > maxLength) {
3846 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847 "entity value too long\n");
3848 goto error;
3849 }
3850 }
3851 buf[len] = 0;
3852 if (ctxt->instate == XML_PARSER_EOF)
3853 goto error;
3854 if (c != stop) {
3855 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856 goto error;
3857 }
3858 NEXT;
3859
3860 /*
3861 * Raise problem w.r.t. '&' and '%' being used in non-entities
3862 * reference constructs. Note Charref will be handled in
3863 * xmlStringDecodeEntities()
3864 */
3865 cur = buf;
3866 while (*cur != 0) { /* non input consuming */
3867 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868 xmlChar *name;
3869 xmlChar tmp = *cur;
3870 int nameOk = 0;
3871
3872 cur++;
3873 name = xmlParseStringName(ctxt, &cur);
3874 if (name != NULL) {
3875 nameOk = 1;
3876 xmlFree(name);
3877 }
3878 if ((nameOk == 0) || (*cur != ';')) {
3879 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3880 "EntityValue: '%c' forbidden except for entities references\n",
3881 tmp);
3882 goto error;
3883 }
3884 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885 (ctxt->inputNr == 1)) {
3886 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3887 goto error;
3888 }
3889 if (*cur == 0)
3890 break;
3891 }
3892 cur++;
3893 }
3894
3895 /*
3896 * Then PEReference entities are substituted.
3897 *
3898 * NOTE: 4.4.7 Bypassed
3899 * When a general entity reference appears in the EntityValue in
3900 * an entity declaration, it is bypassed and left as is.
3901 * so XML_SUBSTITUTE_REF is not set here.
3902 */
3903 ++ctxt->depth;
3904 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905 0, 0, 0);
3906 --ctxt->depth;
3907 if (orig != NULL) {
3908 *orig = buf;
3909 buf = NULL;
3910 }
3911
3912 error:
3913 if (buf != NULL)
3914 xmlFree(buf);
3915 return(ret);
3916 }
3917
3918 /**
3919 * xmlParseAttValueComplex:
3920 * @ctxt: an XML parser context
3921 * @len: the resulting attribute len
3922 * @normalize: whether to apply the inner normalization
3923 *
3924 * parse a value for an attribute, this is the fallback function
3925 * of xmlParseAttValue() when the attribute parsing requires handling
3926 * of non-ASCII characters, or normalization compaction.
3927 *
3928 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929 */
3930 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3931 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3932 xmlChar limit = 0;
3933 xmlChar *buf = NULL;
3934 xmlChar *rep = NULL;
3935 size_t len = 0;
3936 size_t buf_size = 0;
3937 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938 XML_MAX_HUGE_LENGTH :
3939 XML_MAX_TEXT_LENGTH;
3940 int c, l, in_space = 0;
3941 xmlChar *current = NULL;
3942 xmlEntityPtr ent;
3943
3944 if (NXT(0) == '"') {
3945 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946 limit = '"';
3947 NEXT;
3948 } else if (NXT(0) == '\'') {
3949 limit = '\'';
3950 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951 NEXT;
3952 } else {
3953 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3954 return(NULL);
3955 }
3956
3957 /*
3958 * allocate a translation buffer.
3959 */
3960 buf_size = XML_PARSER_BUFFER_SIZE;
3961 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3962 if (buf == NULL) goto mem_error;
3963
3964 /*
3965 * OK loop until we reach one of the ending char or a size limit.
3966 */
3967 c = CUR_CHAR(l);
3968 while (((NXT(0) != limit) && /* checked */
3969 (IS_CHAR(c)) && (c != '<')) &&
3970 (ctxt->instate != XML_PARSER_EOF)) {
3971 if (c == '&') {
3972 in_space = 0;
3973 if (NXT(1) == '#') {
3974 int val = xmlParseCharRef(ctxt);
3975
3976 if (val == '&') {
3977 if (ctxt->replaceEntities) {
3978 if (len + 10 > buf_size) {
3979 growBuffer(buf, 10);
3980 }
3981 buf[len++] = '&';
3982 } else {
3983 /*
3984 * The reparsing will be done in xmlStringGetNodeList()
3985 * called by the attribute() function in SAX.c
3986 */
3987 if (len + 10 > buf_size) {
3988 growBuffer(buf, 10);
3989 }
3990 buf[len++] = '&';
3991 buf[len++] = '#';
3992 buf[len++] = '3';
3993 buf[len++] = '8';
3994 buf[len++] = ';';
3995 }
3996 } else if (val != 0) {
3997 if (len + 10 > buf_size) {
3998 growBuffer(buf, 10);
3999 }
4000 len += xmlCopyChar(0, &buf[len], val);
4001 }
4002 } else {
4003 ent = xmlParseEntityRef(ctxt);
4004 ctxt->nbentities++;
4005 if (ent != NULL)
4006 ctxt->nbentities += ent->owner;
4007 if ((ent != NULL) &&
4008 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4009 if (len + 10 > buf_size) {
4010 growBuffer(buf, 10);
4011 }
4012 if ((ctxt->replaceEntities == 0) &&
4013 (ent->content[0] == '&')) {
4014 buf[len++] = '&';
4015 buf[len++] = '#';
4016 buf[len++] = '3';
4017 buf[len++] = '8';
4018 buf[len++] = ';';
4019 } else {
4020 buf[len++] = ent->content[0];
4021 }
4022 } else if ((ent != NULL) &&
4023 (ctxt->replaceEntities != 0)) {
4024 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4025 ++ctxt->depth;
4026 rep = xmlStringDecodeEntities(ctxt, ent->content,
4027 XML_SUBSTITUTE_REF,
4028 0, 0, 0);
4029 --ctxt->depth;
4030 if (rep != NULL) {
4031 current = rep;
4032 while (*current != 0) { /* non input consuming */
4033 if ((*current == 0xD) || (*current == 0xA) ||
4034 (*current == 0x9)) {
4035 buf[len++] = 0x20;
4036 current++;
4037 } else
4038 buf[len++] = *current++;
4039 if (len + 10 > buf_size) {
4040 growBuffer(buf, 10);
4041 }
4042 }
4043 xmlFree(rep);
4044 rep = NULL;
4045 }
4046 } else {
4047 if (len + 10 > buf_size) {
4048 growBuffer(buf, 10);
4049 }
4050 if (ent->content != NULL)
4051 buf[len++] = ent->content[0];
4052 }
4053 } else if (ent != NULL) {
4054 int i = xmlStrlen(ent->name);
4055 const xmlChar *cur = ent->name;
4056
4057 /*
4058 * This may look absurd but is needed to detect
4059 * entities problems
4060 */
4061 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4062 (ent->content != NULL) && (ent->checked == 0)) {
4063 unsigned long oldnbent = ctxt->nbentities, diff;
4064
4065 ++ctxt->depth;
4066 rep = xmlStringDecodeEntities(ctxt, ent->content,
4067 XML_SUBSTITUTE_REF, 0, 0, 0);
4068 --ctxt->depth;
4069
4070 diff = ctxt->nbentities - oldnbent + 1;
4071 if (diff > INT_MAX / 2)
4072 diff = INT_MAX / 2;
4073 ent->checked = diff * 2;
4074 if (rep != NULL) {
4075 if (xmlStrchr(rep, '<'))
4076 ent->checked |= 1;
4077 xmlFree(rep);
4078 rep = NULL;
4079 } else {
4080 ent->content[0] = 0;
4081 }
4082 }
4083
4084 /*
4085 * Just output the reference
4086 */
4087 buf[len++] = '&';
4088 while (len + i + 10 > buf_size) {
4089 growBuffer(buf, i + 10);
4090 }
4091 for (;i > 0;i--)
4092 buf[len++] = *cur++;
4093 buf[len++] = ';';
4094 }
4095 }
4096 } else {
4097 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4098 if ((len != 0) || (!normalize)) {
4099 if ((!normalize) || (!in_space)) {
4100 COPY_BUF(l,buf,len,0x20);
4101 while (len + 10 > buf_size) {
4102 growBuffer(buf, 10);
4103 }
4104 }
4105 in_space = 1;
4106 }
4107 } else {
4108 in_space = 0;
4109 COPY_BUF(l,buf,len,c);
4110 if (len + 10 > buf_size) {
4111 growBuffer(buf, 10);
4112 }
4113 }
4114 NEXTL(l);
4115 }
4116 GROW;
4117 c = CUR_CHAR(l);
4118 if (len > maxLength) {
4119 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120 "AttValue length too long\n");
4121 goto mem_error;
4122 }
4123 }
4124 if (ctxt->instate == XML_PARSER_EOF)
4125 goto error;
4126
4127 if ((in_space) && (normalize)) {
4128 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4129 }
4130 buf[len] = 0;
4131 if (RAW == '<') {
4132 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4133 } else if (RAW != limit) {
4134 if ((c != 0) && (!IS_CHAR(c))) {
4135 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136 "invalid character in attribute value\n");
4137 } else {
4138 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139 "AttValue: ' expected\n");
4140 }
4141 } else
4142 NEXT;
4143
4144 if (attlen != NULL) *attlen = (int) len;
4145 return(buf);
4146
4147 mem_error:
4148 xmlErrMemory(ctxt, NULL);
4149 error:
4150 if (buf != NULL)
4151 xmlFree(buf);
4152 if (rep != NULL)
4153 xmlFree(rep);
4154 return(NULL);
4155 }
4156
4157 /**
4158 * xmlParseAttValue:
4159 * @ctxt: an XML parser context
4160 *
4161 * parse a value for an attribute
4162 * Note: the parser won't do substitution of entities here, this
4163 * will be handled later in xmlStringGetNodeList
4164 *
4165 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4166 * "'" ([^<&'] | Reference)* "'"
4167 *
4168 * 3.3.3 Attribute-Value Normalization:
4169 * Before the value of an attribute is passed to the application or
4170 * checked for validity, the XML processor must normalize it as follows:
4171 * - a character reference is processed by appending the referenced
4172 * character to the attribute value
4173 * - an entity reference is processed by recursively processing the
4174 * replacement text of the entity
4175 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4176 * appending #x20 to the normalized value, except that only a single
4177 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4178 * parsed entity or the literal entity value of an internal parsed entity
4179 * - other characters are processed by appending them to the normalized value
4180 * If the declared value is not CDATA, then the XML processor must further
4181 * process the normalized attribute value by discarding any leading and
4182 * trailing space (#x20) characters, and by replacing sequences of space
4183 * (#x20) characters by a single space (#x20) character.
4184 * All attributes for which no declaration has been read should be treated
4185 * by a non-validating parser as if declared CDATA.
4186 *
4187 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4188 */
4189
4190
4191 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4192 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4193 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4194 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4195 }
4196
4197 /**
4198 * xmlParseSystemLiteral:
4199 * @ctxt: an XML parser context
4200 *
4201 * parse an XML Literal
4202 *
4203 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4204 *
4205 * Returns the SystemLiteral parsed or NULL
4206 */
4207
4208 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4209 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4210 xmlChar *buf = NULL;
4211 int len = 0;
4212 int size = XML_PARSER_BUFFER_SIZE;
4213 int cur, l;
4214 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4215 XML_MAX_TEXT_LENGTH :
4216 XML_MAX_NAME_LENGTH;
4217 xmlChar stop;
4218 int state = ctxt->instate;
4219 int count = 0;
4220
4221 SHRINK;
4222 if (RAW == '"') {
4223 NEXT;
4224 stop = '"';
4225 } else if (RAW == '\'') {
4226 NEXT;
4227 stop = '\'';
4228 } else {
4229 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4230 return(NULL);
4231 }
4232
4233 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4234 if (buf == NULL) {
4235 xmlErrMemory(ctxt, NULL);
4236 return(NULL);
4237 }
4238 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4239 cur = CUR_CHAR(l);
4240 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4241 if (len + 5 >= size) {
4242 xmlChar *tmp;
4243
4244 size *= 2;
4245 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4246 if (tmp == NULL) {
4247 xmlFree(buf);
4248 xmlErrMemory(ctxt, NULL);
4249 ctxt->instate = (xmlParserInputState) state;
4250 return(NULL);
4251 }
4252 buf = tmp;
4253 }
4254 count++;
4255 if (count > 50) {
4256 SHRINK;
4257 GROW;
4258 count = 0;
4259 if (ctxt->instate == XML_PARSER_EOF) {
4260 xmlFree(buf);
4261 return(NULL);
4262 }
4263 }
4264 COPY_BUF(l,buf,len,cur);
4265 NEXTL(l);
4266 cur = CUR_CHAR(l);
4267 if (cur == 0) {
4268 GROW;
4269 SHRINK;
4270 cur = CUR_CHAR(l);
4271 }
4272 if (len > maxLength) {
4273 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4274 xmlFree(buf);
4275 ctxt->instate = (xmlParserInputState) state;
4276 return(NULL);
4277 }
4278 }
4279 buf[len] = 0;
4280 ctxt->instate = (xmlParserInputState) state;
4281 if (!IS_CHAR(cur)) {
4282 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4283 } else {
4284 NEXT;
4285 }
4286 return(buf);
4287 }
4288
4289 /**
4290 * xmlParsePubidLiteral:
4291 * @ctxt: an XML parser context
4292 *
4293 * parse an XML public literal
4294 *
4295 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4296 *
4297 * Returns the PubidLiteral parsed or NULL.
4298 */
4299
4300 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4301 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4302 xmlChar *buf = NULL;
4303 int len = 0;
4304 int size = XML_PARSER_BUFFER_SIZE;
4305 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4306 XML_MAX_TEXT_LENGTH :
4307 XML_MAX_NAME_LENGTH;
4308 xmlChar cur;
4309 xmlChar stop;
4310 int count = 0;
4311 xmlParserInputState oldstate = ctxt->instate;
4312
4313 SHRINK;
4314 if (RAW == '"') {
4315 NEXT;
4316 stop = '"';
4317 } else if (RAW == '\'') {
4318 NEXT;
4319 stop = '\'';
4320 } else {
4321 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4322 return(NULL);
4323 }
4324 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4325 if (buf == NULL) {
4326 xmlErrMemory(ctxt, NULL);
4327 return(NULL);
4328 }
4329 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4330 cur = CUR;
4331 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4332 if (len + 1 >= size) {
4333 xmlChar *tmp;
4334
4335 size *= 2;
4336 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4337 if (tmp == NULL) {
4338 xmlErrMemory(ctxt, NULL);
4339 xmlFree(buf);
4340 return(NULL);
4341 }
4342 buf = tmp;
4343 }
4344 buf[len++] = cur;
4345 count++;
4346 if (count > 50) {
4347 SHRINK;
4348 GROW;
4349 count = 0;
4350 if (ctxt->instate == XML_PARSER_EOF) {
4351 xmlFree(buf);
4352 return(NULL);
4353 }
4354 }
4355 NEXT;
4356 cur = CUR;
4357 if (cur == 0) {
4358 GROW;
4359 SHRINK;
4360 cur = CUR;
4361 }
4362 if (len > maxLength) {
4363 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4364 xmlFree(buf);
4365 return(NULL);
4366 }
4367 }
4368 buf[len] = 0;
4369 if (cur != stop) {
4370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4371 } else {
4372 NEXT;
4373 }
4374 ctxt->instate = oldstate;
4375 return(buf);
4376 }
4377
4378 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4379
4380 /*
4381 * used for the test in the inner loop of the char data testing
4382 */
4383 static const unsigned char test_char_data[256] = {
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4389 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4390 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4391 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4392 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4393 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4394 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4395 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4396 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4397 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4398 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4399 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4416 };
4417
4418 /**
4419 * xmlParseCharData:
4420 * @ctxt: an XML parser context
4421 * @cdata: int indicating whether we are within a CDATA section
4422 *
4423 * parse a CharData section.
4424 * if we are within a CDATA section ']]>' marks an end of section.
4425 *
4426 * The right angle bracket (>) may be represented using the string ">",
4427 * and must, for compatibility, be escaped using ">" or a character
4428 * reference when it appears in the string "]]>" in content, when that
4429 * string is not marking the end of a CDATA section.
4430 *
4431 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4432 */
4433
4434 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4435 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4436 const xmlChar *in;
4437 int nbchar = 0;
4438 int line = ctxt->input->line;
4439 int col = ctxt->input->col;
4440 int ccol;
4441
4442 SHRINK;
4443 GROW;
4444 /*
4445 * Accelerated common case where input don't need to be
4446 * modified before passing it to the handler.
4447 */
4448 if (!cdata) {
4449 in = ctxt->input->cur;
4450 do {
4451 get_more_space:
4452 while (*in == 0x20) { in++; ctxt->input->col++; }
4453 if (*in == 0xA) {
4454 do {
4455 ctxt->input->line++; ctxt->input->col = 1;
4456 in++;
4457 } while (*in == 0xA);
4458 goto get_more_space;
4459 }
4460 if (*in == '<') {
4461 nbchar = in - ctxt->input->cur;
4462 if (nbchar > 0) {
4463 const xmlChar *tmp = ctxt->input->cur;
4464 ctxt->input->cur = in;
4465
4466 if ((ctxt->sax != NULL) &&
4467 (ctxt->sax->ignorableWhitespace !=
4468 ctxt->sax->characters)) {
4469 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4470 if (ctxt->sax->ignorableWhitespace != NULL)
4471 ctxt->sax->ignorableWhitespace(ctxt->userData,
4472 tmp, nbchar);
4473 } else {
4474 if (ctxt->sax->characters != NULL)
4475 ctxt->sax->characters(ctxt->userData,
4476 tmp, nbchar);
4477 if (*ctxt->space == -1)
4478 *ctxt->space = -2;
4479 }
4480 } else if ((ctxt->sax != NULL) &&
4481 (ctxt->sax->characters != NULL)) {
4482 ctxt->sax->characters(ctxt->userData,
4483 tmp, nbchar);
4484 }
4485 }
4486 return;
4487 }
4488
4489 get_more:
4490 ccol = ctxt->input->col;
4491 while (test_char_data[*in]) {
4492 in++;
4493 ccol++;
4494 }
4495 ctxt->input->col = ccol;
4496 if (*in == 0xA) {
4497 do {
4498 ctxt->input->line++; ctxt->input->col = 1;
4499 in++;
4500 } while (*in == 0xA);
4501 goto get_more;
4502 }
4503 if (*in == ']') {
4504 if ((in[1] == ']') && (in[2] == '>')) {
4505 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4506 ctxt->input->cur = in + 1;
4507 return;
4508 }
4509 in++;
4510 ctxt->input->col++;
4511 goto get_more;
4512 }
4513 nbchar = in - ctxt->input->cur;
4514 if (nbchar > 0) {
4515 if ((ctxt->sax != NULL) &&
4516 (ctxt->sax->ignorableWhitespace !=
4517 ctxt->sax->characters) &&
4518 (IS_BLANK_CH(*ctxt->input->cur))) {
4519 const xmlChar *tmp = ctxt->input->cur;
4520 ctxt->input->cur = in;
4521
4522 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4523 if (ctxt->sax->ignorableWhitespace != NULL)
4524 ctxt->sax->ignorableWhitespace(ctxt->userData,
4525 tmp, nbchar);
4526 } else {
4527 if (ctxt->sax->characters != NULL)
4528 ctxt->sax->characters(ctxt->userData,
4529 tmp, nbchar);
4530 if (*ctxt->space == -1)
4531 *ctxt->space = -2;
4532 }
4533 line = ctxt->input->line;
4534 col = ctxt->input->col;
4535 } else if (ctxt->sax != NULL) {
4536 if (ctxt->sax->characters != NULL)
4537 ctxt->sax->characters(ctxt->userData,
4538 ctxt->input->cur, nbchar);
4539 line = ctxt->input->line;
4540 col = ctxt->input->col;
4541 }
4542 /* something really bad happened in the SAX callback */
4543 if (ctxt->instate != XML_PARSER_CONTENT)
4544 return;
4545 }
4546 ctxt->input->cur = in;
4547 if (*in == 0xD) {
4548 in++;
4549 if (*in == 0xA) {
4550 ctxt->input->cur = in;
4551 in++;
4552 ctxt->input->line++; ctxt->input->col = 1;
4553 continue; /* while */
4554 }
4555 in--;
4556 }
4557 if (*in == '<') {
4558 return;
4559 }
4560 if (*in == '&') {
4561 return;
4562 }
4563 SHRINK;
4564 GROW;
4565 if (ctxt->instate == XML_PARSER_EOF)
4566 return;
4567 in = ctxt->input->cur;
4568 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4569 nbchar = 0;
4570 }
4571 ctxt->input->line = line;
4572 ctxt->input->col = col;
4573 xmlParseCharDataComplex(ctxt, cdata);
4574 }
4575
4576 /**
4577 * xmlParseCharDataComplex:
4578 * @ctxt: an XML parser context
4579 * @cdata: int indicating whether we are within a CDATA section
4580 *
4581 * parse a CharData section.this is the fallback function
4582 * of xmlParseCharData() when the parsing requires handling
4583 * of non-ASCII characters.
4584 */
4585 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4586 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4587 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4588 int nbchar = 0;
4589 int cur, l;
4590 int count = 0;
4591
4592 SHRINK;
4593 GROW;
4594 cur = CUR_CHAR(l);
4595 while ((cur != '<') && /* checked */
4596 (cur != '&') &&
4597 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4598 if ((cur == ']') && (NXT(1) == ']') &&
4599 (NXT(2) == '>')) {
4600 if (cdata) break;
4601 else {
4602 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4603 }
4604 }
4605 COPY_BUF(l,buf,nbchar,cur);
4606 /* move current position before possible calling of ctxt->sax->characters */
4607 NEXTL(l);
4608 cur = CUR_CHAR(l);
4609 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4610 buf[nbchar] = 0;
4611
4612 /*
4613 * OK the segment is to be consumed as chars.
4614 */
4615 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4616 if (areBlanks(ctxt, buf, nbchar, 0)) {
4617 if (ctxt->sax->ignorableWhitespace != NULL)
4618 ctxt->sax->ignorableWhitespace(ctxt->userData,
4619 buf, nbchar);
4620 } else {
4621 if (ctxt->sax->characters != NULL)
4622 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4623 if ((ctxt->sax->characters !=
4624 ctxt->sax->ignorableWhitespace) &&
4625 (*ctxt->space == -1))
4626 *ctxt->space = -2;
4627 }
4628 }
4629 nbchar = 0;
4630 /* something really bad happened in the SAX callback */
4631 if (ctxt->instate != XML_PARSER_CONTENT)
4632 return;
4633 }
4634 count++;
4635 if (count > 50) {
4636 SHRINK;
4637 GROW;
4638 count = 0;
4639 if (ctxt->instate == XML_PARSER_EOF)
4640 return;
4641 }
4642 }
4643 if (nbchar != 0) {
4644 buf[nbchar] = 0;
4645 /*
4646 * OK the segment is to be consumed as chars.
4647 */
4648 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4649 if (areBlanks(ctxt, buf, nbchar, 0)) {
4650 if (ctxt->sax->ignorableWhitespace != NULL)
4651 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4652 } else {
4653 if (ctxt->sax->characters != NULL)
4654 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4655 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4656 (*ctxt->space == -1))
4657 *ctxt->space = -2;
4658 }
4659 }
4660 }
4661 if ((cur != 0) && (!IS_CHAR(cur))) {
4662 /* Generate the error and skip the offending character */
4663 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4664 "PCDATA invalid Char value %d\n",
4665 cur);
4666 NEXTL(l);
4667 }
4668 }
4669
4670 /**
4671 * xmlParseExternalID:
4672 * @ctxt: an XML parser context
4673 * @publicID: a xmlChar** receiving PubidLiteral
4674 * @strict: indicate whether we should restrict parsing to only
4675 * production [75], see NOTE below
4676 *
4677 * Parse an External ID or a Public ID
4678 *
4679 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4680 * 'PUBLIC' S PubidLiteral S SystemLiteral
4681 *
4682 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4683 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4684 *
4685 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4686 *
4687 * Returns the function returns SystemLiteral and in the second
4688 * case publicID receives PubidLiteral, is strict is off
4689 * it is possible to return NULL and have publicID set.
4690 */
4691
4692 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4693 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4694 xmlChar *URI = NULL;
4695
4696 SHRINK;
4697
4698 *publicID = NULL;
4699 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4700 SKIP(6);
4701 if (SKIP_BLANKS == 0) {
4702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703 "Space required after 'SYSTEM'\n");
4704 }
4705 URI = xmlParseSystemLiteral(ctxt);
4706 if (URI == NULL) {
4707 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4708 }
4709 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4710 SKIP(6);
4711 if (SKIP_BLANKS == 0) {
4712 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4713 "Space required after 'PUBLIC'\n");
4714 }
4715 *publicID = xmlParsePubidLiteral(ctxt);
4716 if (*publicID == NULL) {
4717 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4718 }
4719 if (strict) {
4720 /*
4721 * We don't handle [83] so "S SystemLiteral" is required.
4722 */
4723 if (SKIP_BLANKS == 0) {
4724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4725 "Space required after the Public Identifier\n");
4726 }
4727 } else {
4728 /*
4729 * We handle [83] so we return immediately, if
4730 * "S SystemLiteral" is not detected. We skip blanks if no
4731 * system literal was found, but this is harmless since we must
4732 * be at the end of a NotationDecl.
4733 */
4734 if (SKIP_BLANKS == 0) return(NULL);
4735 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4736 }
4737 URI = xmlParseSystemLiteral(ctxt);
4738 if (URI == NULL) {
4739 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4740 }
4741 }
4742 return(URI);
4743 }
4744
4745 /**
4746 * xmlParseCommentComplex:
4747 * @ctxt: an XML parser context
4748 * @buf: the already parsed part of the buffer
4749 * @len: number of bytes in the buffer
4750 * @size: allocated size of the buffer
4751 *
4752 * Skip an XML (SGML) comment <!-- .... -->
4753 * The spec says that "For compatibility, the string "--" (double-hyphen)
4754 * must not occur within comments. "
4755 * This is the slow routine in case the accelerator for ascii didn't work
4756 *
4757 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4758 */
4759 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4760 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4761 size_t len, size_t size) {
4762 int q, ql;
4763 int r, rl;
4764 int cur, l;
4765 size_t count = 0;
4766 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4767 XML_MAX_HUGE_LENGTH :
4768 XML_MAX_TEXT_LENGTH;
4769 int inputid;
4770
4771 inputid = ctxt->input->id;
4772
4773 if (buf == NULL) {
4774 len = 0;
4775 size = XML_PARSER_BUFFER_SIZE;
4776 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4777 if (buf == NULL) {
4778 xmlErrMemory(ctxt, NULL);
4779 return;
4780 }
4781 }
4782 GROW; /* Assure there's enough input data */
4783 q = CUR_CHAR(ql);
4784 if (q == 0)
4785 goto not_terminated;
4786 if (!IS_CHAR(q)) {
4787 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4788 "xmlParseComment: invalid xmlChar value %d\n",
4789 q);
4790 xmlFree (buf);
4791 return;
4792 }
4793 NEXTL(ql);
4794 r = CUR_CHAR(rl);
4795 if (r == 0)
4796 goto not_terminated;
4797 if (!IS_CHAR(r)) {
4798 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4799 "xmlParseComment: invalid xmlChar value %d\n",
4800 q);
4801 xmlFree (buf);
4802 return;
4803 }
4804 NEXTL(rl);
4805 cur = CUR_CHAR(l);
4806 if (cur == 0)
4807 goto not_terminated;
4808 while (IS_CHAR(cur) && /* checked */
4809 ((cur != '>') ||
4810 (r != '-') || (q != '-'))) {
4811 if ((r == '-') && (q == '-')) {
4812 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4813 }
4814 if (len + 5 >= size) {
4815 xmlChar *new_buf;
4816 size_t new_size;
4817
4818 new_size = size * 2;
4819 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4820 if (new_buf == NULL) {
4821 xmlFree (buf);
4822 xmlErrMemory(ctxt, NULL);
4823 return;
4824 }
4825 buf = new_buf;
4826 size = new_size;
4827 }
4828 COPY_BUF(ql,buf,len,q);
4829 q = r;
4830 ql = rl;
4831 r = cur;
4832 rl = l;
4833
4834 count++;
4835 if (count > 50) {
4836 SHRINK;
4837 GROW;
4838 count = 0;
4839 if (ctxt->instate == XML_PARSER_EOF) {
4840 xmlFree(buf);
4841 return;
4842 }
4843 }
4844 NEXTL(l);
4845 cur = CUR_CHAR(l);
4846 if (cur == 0) {
4847 SHRINK;
4848 GROW;
4849 cur = CUR_CHAR(l);
4850 }
4851
4852 if (len > maxLength) {
4853 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4854 "Comment too big found", NULL);
4855 xmlFree (buf);
4856 return;
4857 }
4858 }
4859 buf[len] = 0;
4860 if (cur == 0) {
4861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862 "Comment not terminated \n<!--%.50s\n", buf);
4863 } else if (!IS_CHAR(cur)) {
4864 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4865 "xmlParseComment: invalid xmlChar value %d\n",
4866 cur);
4867 } else {
4868 if (inputid != ctxt->input->id) {
4869 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4870 "Comment doesn't start and stop in the same"
4871 " entity\n");
4872 }
4873 NEXT;
4874 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4875 (!ctxt->disableSAX))
4876 ctxt->sax->comment(ctxt->userData, buf);
4877 }
4878 xmlFree(buf);
4879 return;
4880 not_terminated:
4881 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4882 "Comment not terminated\n", NULL);
4883 xmlFree(buf);
4884 return;
4885 }
4886
4887 /**
4888 * xmlParseComment:
4889 * @ctxt: an XML parser context
4890 *
4891 * Skip an XML (SGML) comment <!-- .... -->
4892 * The spec says that "For compatibility, the string "--" (double-hyphen)
4893 * must not occur within comments. "
4894 *
4895 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4896 */
4897 void
xmlParseComment(xmlParserCtxtPtr ctxt)4898 xmlParseComment(xmlParserCtxtPtr ctxt) {
4899 xmlChar *buf = NULL;
4900 size_t size = XML_PARSER_BUFFER_SIZE;
4901 size_t len = 0;
4902 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4903 XML_MAX_HUGE_LENGTH :
4904 XML_MAX_TEXT_LENGTH;
4905 xmlParserInputState state;
4906 const xmlChar *in;
4907 size_t nbchar = 0;
4908 int ccol;
4909 int inputid;
4910
4911 /*
4912 * Check that there is a comment right here.
4913 */
4914 if ((RAW != '<') || (NXT(1) != '!') ||
4915 (NXT(2) != '-') || (NXT(3) != '-')) return;
4916 state = ctxt->instate;
4917 ctxt->instate = XML_PARSER_COMMENT;
4918 inputid = ctxt->input->id;
4919 SKIP(4);
4920 SHRINK;
4921 GROW;
4922
4923 /*
4924 * Accelerated common case where input don't need to be
4925 * modified before passing it to the handler.
4926 */
4927 in = ctxt->input->cur;
4928 do {
4929 if (*in == 0xA) {
4930 do {
4931 ctxt->input->line++; ctxt->input->col = 1;
4932 in++;
4933 } while (*in == 0xA);
4934 }
4935 get_more:
4936 ccol = ctxt->input->col;
4937 while (((*in > '-') && (*in <= 0x7F)) ||
4938 ((*in >= 0x20) && (*in < '-')) ||
4939 (*in == 0x09)) {
4940 in++;
4941 ccol++;
4942 }
4943 ctxt->input->col = ccol;
4944 if (*in == 0xA) {
4945 do {
4946 ctxt->input->line++; ctxt->input->col = 1;
4947 in++;
4948 } while (*in == 0xA);
4949 goto get_more;
4950 }
4951 nbchar = in - ctxt->input->cur;
4952 /*
4953 * save current set of data
4954 */
4955 if (nbchar > 0) {
4956 if ((ctxt->sax != NULL) &&
4957 (ctxt->sax->comment != NULL)) {
4958 if (buf == NULL) {
4959 if ((*in == '-') && (in[1] == '-'))
4960 size = nbchar + 1;
4961 else
4962 size = XML_PARSER_BUFFER_SIZE + nbchar;
4963 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4964 if (buf == NULL) {
4965 xmlErrMemory(ctxt, NULL);
4966 ctxt->instate = state;
4967 return;
4968 }
4969 len = 0;
4970 } else if (len + nbchar + 1 >= size) {
4971 xmlChar *new_buf;
4972 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4973 new_buf = (xmlChar *) xmlRealloc(buf,
4974 size * sizeof(xmlChar));
4975 if (new_buf == NULL) {
4976 xmlFree (buf);
4977 xmlErrMemory(ctxt, NULL);
4978 ctxt->instate = state;
4979 return;
4980 }
4981 buf = new_buf;
4982 }
4983 memcpy(&buf[len], ctxt->input->cur, nbchar);
4984 len += nbchar;
4985 buf[len] = 0;
4986 }
4987 }
4988 if (len > maxLength) {
4989 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4990 "Comment too big found", NULL);
4991 xmlFree (buf);
4992 return;
4993 }
4994 ctxt->input->cur = in;
4995 if (*in == 0xA) {
4996 in++;
4997 ctxt->input->line++; ctxt->input->col = 1;
4998 }
4999 if (*in == 0xD) {
5000 in++;
5001 if (*in == 0xA) {
5002 ctxt->input->cur = in;
5003 in++;
5004 ctxt->input->line++; ctxt->input->col = 1;
5005 goto get_more;
5006 }
5007 in--;
5008 }
5009 SHRINK;
5010 GROW;
5011 if (ctxt->instate == XML_PARSER_EOF) {
5012 xmlFree(buf);
5013 return;
5014 }
5015 in = ctxt->input->cur;
5016 if (*in == '-') {
5017 if (in[1] == '-') {
5018 if (in[2] == '>') {
5019 if (ctxt->input->id != inputid) {
5020 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5021 "comment doesn't start and stop in the"
5022 " same entity\n");
5023 }
5024 SKIP(3);
5025 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5026 (!ctxt->disableSAX)) {
5027 if (buf != NULL)
5028 ctxt->sax->comment(ctxt->userData, buf);
5029 else
5030 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5031 }
5032 if (buf != NULL)
5033 xmlFree(buf);
5034 if (ctxt->instate != XML_PARSER_EOF)
5035 ctxt->instate = state;
5036 return;
5037 }
5038 if (buf != NULL) {
5039 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5040 "Double hyphen within comment: "
5041 "<!--%.50s\n",
5042 buf);
5043 } else
5044 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5045 "Double hyphen within comment\n", NULL);
5046 if (ctxt->instate == XML_PARSER_EOF) {
5047 xmlFree(buf);
5048 return;
5049 }
5050 in++;
5051 ctxt->input->col++;
5052 }
5053 in++;
5054 ctxt->input->col++;
5055 goto get_more;
5056 }
5057 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5058 xmlParseCommentComplex(ctxt, buf, len, size);
5059 ctxt->instate = state;
5060 return;
5061 }
5062
5063
5064 /**
5065 * xmlParsePITarget:
5066 * @ctxt: an XML parser context
5067 *
5068 * parse the name of a PI
5069 *
5070 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5071 *
5072 * Returns the PITarget name or NULL
5073 */
5074
5075 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5076 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5077 const xmlChar *name;
5078
5079 name = xmlParseName(ctxt);
5080 if ((name != NULL) &&
5081 ((name[0] == 'x') || (name[0] == 'X')) &&
5082 ((name[1] == 'm') || (name[1] == 'M')) &&
5083 ((name[2] == 'l') || (name[2] == 'L'))) {
5084 int i;
5085 if ((name[0] == 'x') && (name[1] == 'm') &&
5086 (name[2] == 'l') && (name[3] == 0)) {
5087 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5088 "XML declaration allowed only at the start of the document\n");
5089 return(name);
5090 } else if (name[3] == 0) {
5091 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5092 return(name);
5093 }
5094 for (i = 0;;i++) {
5095 if (xmlW3CPIs[i] == NULL) break;
5096 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5097 return(name);
5098 }
5099 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5100 "xmlParsePITarget: invalid name prefix 'xml'\n",
5101 NULL, NULL);
5102 }
5103 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5104 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5105 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5106 }
5107 return(name);
5108 }
5109
5110 #ifdef LIBXML_CATALOG_ENABLED
5111 /**
5112 * xmlParseCatalogPI:
5113 * @ctxt: an XML parser context
5114 * @catalog: the PI value string
5115 *
5116 * parse an XML Catalog Processing Instruction.
5117 *
5118 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5119 *
5120 * Occurs only if allowed by the user and if happening in the Misc
5121 * part of the document before any doctype information
5122 * This will add the given catalog to the parsing context in order
5123 * to be used if there is a resolution need further down in the document
5124 */
5125
5126 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5127 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5128 xmlChar *URL = NULL;
5129 const xmlChar *tmp, *base;
5130 xmlChar marker;
5131
5132 tmp = catalog;
5133 while (IS_BLANK_CH(*tmp)) tmp++;
5134 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5135 goto error;
5136 tmp += 7;
5137 while (IS_BLANK_CH(*tmp)) tmp++;
5138 if (*tmp != '=') {
5139 return;
5140 }
5141 tmp++;
5142 while (IS_BLANK_CH(*tmp)) tmp++;
5143 marker = *tmp;
5144 if ((marker != '\'') && (marker != '"'))
5145 goto error;
5146 tmp++;
5147 base = tmp;
5148 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5149 if (*tmp == 0)
5150 goto error;
5151 URL = xmlStrndup(base, tmp - base);
5152 tmp++;
5153 while (IS_BLANK_CH(*tmp)) tmp++;
5154 if (*tmp != 0)
5155 goto error;
5156
5157 if (URL != NULL) {
5158 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5159 xmlFree(URL);
5160 }
5161 return;
5162
5163 error:
5164 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5165 "Catalog PI syntax error: %s\n",
5166 catalog, NULL);
5167 if (URL != NULL)
5168 xmlFree(URL);
5169 }
5170 #endif
5171
5172 /**
5173 * xmlParsePI:
5174 * @ctxt: an XML parser context
5175 *
5176 * parse an XML Processing Instruction.
5177 *
5178 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5179 *
5180 * The processing is transferred to SAX once parsed.
5181 */
5182
5183 void
xmlParsePI(xmlParserCtxtPtr ctxt)5184 xmlParsePI(xmlParserCtxtPtr ctxt) {
5185 xmlChar *buf = NULL;
5186 size_t len = 0;
5187 size_t size = XML_PARSER_BUFFER_SIZE;
5188 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5189 XML_MAX_HUGE_LENGTH :
5190 XML_MAX_TEXT_LENGTH;
5191 int cur, l;
5192 const xmlChar *target;
5193 xmlParserInputState state;
5194 int count = 0;
5195
5196 if ((RAW == '<') && (NXT(1) == '?')) {
5197 int inputid = ctxt->input->id;
5198 state = ctxt->instate;
5199 ctxt->instate = XML_PARSER_PI;
5200 /*
5201 * this is a Processing Instruction.
5202 */
5203 SKIP(2);
5204 SHRINK;
5205
5206 /*
5207 * Parse the target name and check for special support like
5208 * namespace.
5209 */
5210 target = xmlParsePITarget(ctxt);
5211 if (target != NULL) {
5212 if ((RAW == '?') && (NXT(1) == '>')) {
5213 if (inputid != ctxt->input->id) {
5214 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5215 "PI declaration doesn't start and stop in"
5216 " the same entity\n");
5217 }
5218 SKIP(2);
5219
5220 /*
5221 * SAX: PI detected.
5222 */
5223 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 (ctxt->sax->processingInstruction != NULL))
5225 ctxt->sax->processingInstruction(ctxt->userData,
5226 target, NULL);
5227 if (ctxt->instate != XML_PARSER_EOF)
5228 ctxt->instate = state;
5229 return;
5230 }
5231 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5232 if (buf == NULL) {
5233 xmlErrMemory(ctxt, NULL);
5234 ctxt->instate = state;
5235 return;
5236 }
5237 if (SKIP_BLANKS == 0) {
5238 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5239 "ParsePI: PI %s space expected\n", target);
5240 }
5241 cur = CUR_CHAR(l);
5242 while (IS_CHAR(cur) && /* checked */
5243 ((cur != '?') || (NXT(1) != '>'))) {
5244 if (len + 5 >= size) {
5245 xmlChar *tmp;
5246 size_t new_size = size * 2;
5247 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5248 if (tmp == NULL) {
5249 xmlErrMemory(ctxt, NULL);
5250 xmlFree(buf);
5251 ctxt->instate = state;
5252 return;
5253 }
5254 buf = tmp;
5255 size = new_size;
5256 }
5257 count++;
5258 if (count > 50) {
5259 SHRINK;
5260 GROW;
5261 if (ctxt->instate == XML_PARSER_EOF) {
5262 xmlFree(buf);
5263 return;
5264 }
5265 count = 0;
5266 }
5267 COPY_BUF(l,buf,len,cur);
5268 NEXTL(l);
5269 cur = CUR_CHAR(l);
5270 if (cur == 0) {
5271 SHRINK;
5272 GROW;
5273 cur = CUR_CHAR(l);
5274 }
5275 if (len > maxLength) {
5276 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5277 "PI %s too big found", target);
5278 xmlFree(buf);
5279 ctxt->instate = state;
5280 return;
5281 }
5282 }
5283 buf[len] = 0;
5284 if (cur != '?') {
5285 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5286 "ParsePI: PI %s never end ...\n", target);
5287 } else {
5288 if (inputid != ctxt->input->id) {
5289 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5290 "PI declaration doesn't start and stop in"
5291 " the same entity\n");
5292 }
5293 SKIP(2);
5294
5295 #ifdef LIBXML_CATALOG_ENABLED
5296 if (((state == XML_PARSER_MISC) ||
5297 (state == XML_PARSER_START)) &&
5298 (xmlStrEqual(target, XML_CATALOG_PI))) {
5299 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5300 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5301 (allow == XML_CATA_ALLOW_ALL))
5302 xmlParseCatalogPI(ctxt, buf);
5303 }
5304 #endif
5305
5306
5307 /*
5308 * SAX: PI detected.
5309 */
5310 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5311 (ctxt->sax->processingInstruction != NULL))
5312 ctxt->sax->processingInstruction(ctxt->userData,
5313 target, buf);
5314 }
5315 xmlFree(buf);
5316 } else {
5317 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5318 }
5319 if (ctxt->instate != XML_PARSER_EOF)
5320 ctxt->instate = state;
5321 }
5322 }
5323
5324 /**
5325 * xmlParseNotationDecl:
5326 * @ctxt: an XML parser context
5327 *
5328 * parse a notation declaration
5329 *
5330 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5331 *
5332 * Hence there is actually 3 choices:
5333 * 'PUBLIC' S PubidLiteral
5334 * 'PUBLIC' S PubidLiteral S SystemLiteral
5335 * and 'SYSTEM' S SystemLiteral
5336 *
5337 * See the NOTE on xmlParseExternalID().
5338 */
5339
5340 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5341 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5342 const xmlChar *name;
5343 xmlChar *Pubid;
5344 xmlChar *Systemid;
5345
5346 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5347 int inputid = ctxt->input->id;
5348 SHRINK;
5349 SKIP(10);
5350 if (SKIP_BLANKS == 0) {
5351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Space required after '<!NOTATION'\n");
5353 return;
5354 }
5355
5356 name = xmlParseName(ctxt);
5357 if (name == NULL) {
5358 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5359 return;
5360 }
5361 if (xmlStrchr(name, ':') != NULL) {
5362 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5363 "colons are forbidden from notation names '%s'\n",
5364 name, NULL, NULL);
5365 }
5366 if (SKIP_BLANKS == 0) {
5367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5368 "Space required after the NOTATION name'\n");
5369 return;
5370 }
5371
5372 /*
5373 * Parse the IDs.
5374 */
5375 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5376 SKIP_BLANKS;
5377
5378 if (RAW == '>') {
5379 if (inputid != ctxt->input->id) {
5380 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5381 "Notation declaration doesn't start and stop"
5382 " in the same entity\n");
5383 }
5384 NEXT;
5385 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5386 (ctxt->sax->notationDecl != NULL))
5387 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5388 } else {
5389 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5390 }
5391 if (Systemid != NULL) xmlFree(Systemid);
5392 if (Pubid != NULL) xmlFree(Pubid);
5393 }
5394 }
5395
5396 /**
5397 * xmlParseEntityDecl:
5398 * @ctxt: an XML parser context
5399 *
5400 * parse <!ENTITY declarations
5401 *
5402 * [70] EntityDecl ::= GEDecl | PEDecl
5403 *
5404 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5405 *
5406 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5407 *
5408 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5409 *
5410 * [74] PEDef ::= EntityValue | ExternalID
5411 *
5412 * [76] NDataDecl ::= S 'NDATA' S Name
5413 *
5414 * [ VC: Notation Declared ]
5415 * The Name must match the declared name of a notation.
5416 */
5417
5418 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5419 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5420 const xmlChar *name = NULL;
5421 xmlChar *value = NULL;
5422 xmlChar *URI = NULL, *literal = NULL;
5423 const xmlChar *ndata = NULL;
5424 int isParameter = 0;
5425 xmlChar *orig = NULL;
5426
5427 /* GROW; done in the caller */
5428 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5429 int inputid = ctxt->input->id;
5430 SHRINK;
5431 SKIP(8);
5432 if (SKIP_BLANKS == 0) {
5433 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5434 "Space required after '<!ENTITY'\n");
5435 }
5436
5437 if (RAW == '%') {
5438 NEXT;
5439 if (SKIP_BLANKS == 0) {
5440 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5441 "Space required after '%%'\n");
5442 }
5443 isParameter = 1;
5444 }
5445
5446 name = xmlParseName(ctxt);
5447 if (name == NULL) {
5448 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5449 "xmlParseEntityDecl: no name\n");
5450 return;
5451 }
5452 if (xmlStrchr(name, ':') != NULL) {
5453 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5454 "colons are forbidden from entities names '%s'\n",
5455 name, NULL, NULL);
5456 }
5457 if (SKIP_BLANKS == 0) {
5458 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5459 "Space required after the entity name\n");
5460 }
5461
5462 ctxt->instate = XML_PARSER_ENTITY_DECL;
5463 /*
5464 * handle the various case of definitions...
5465 */
5466 if (isParameter) {
5467 if ((RAW == '"') || (RAW == '\'')) {
5468 value = xmlParseEntityValue(ctxt, &orig);
5469 if (value) {
5470 if ((ctxt->sax != NULL) &&
5471 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5472 ctxt->sax->entityDecl(ctxt->userData, name,
5473 XML_INTERNAL_PARAMETER_ENTITY,
5474 NULL, NULL, value);
5475 }
5476 } else {
5477 URI = xmlParseExternalID(ctxt, &literal, 1);
5478 if ((URI == NULL) && (literal == NULL)) {
5479 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5480 }
5481 if (URI) {
5482 xmlURIPtr uri;
5483
5484 uri = xmlParseURI((const char *) URI);
5485 if (uri == NULL) {
5486 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5487 "Invalid URI: %s\n", URI);
5488 /*
5489 * This really ought to be a well formedness error
5490 * but the XML Core WG decided otherwise c.f. issue
5491 * E26 of the XML erratas.
5492 */
5493 } else {
5494 if (uri->fragment != NULL) {
5495 /*
5496 * Okay this is foolish to block those but not
5497 * invalid URIs.
5498 */
5499 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5500 } else {
5501 if ((ctxt->sax != NULL) &&
5502 (!ctxt->disableSAX) &&
5503 (ctxt->sax->entityDecl != NULL))
5504 ctxt->sax->entityDecl(ctxt->userData, name,
5505 XML_EXTERNAL_PARAMETER_ENTITY,
5506 literal, URI, NULL);
5507 }
5508 xmlFreeURI(uri);
5509 }
5510 }
5511 }
5512 } else {
5513 if ((RAW == '"') || (RAW == '\'')) {
5514 value = xmlParseEntityValue(ctxt, &orig);
5515 if ((ctxt->sax != NULL) &&
5516 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5517 ctxt->sax->entityDecl(ctxt->userData, name,
5518 XML_INTERNAL_GENERAL_ENTITY,
5519 NULL, NULL, value);
5520 /*
5521 * For expat compatibility in SAX mode.
5522 */
5523 if ((ctxt->myDoc == NULL) ||
5524 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5525 if (ctxt->myDoc == NULL) {
5526 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5527 if (ctxt->myDoc == NULL) {
5528 xmlErrMemory(ctxt, "New Doc failed");
5529 return;
5530 }
5531 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5532 }
5533 if (ctxt->myDoc->intSubset == NULL)
5534 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5535 BAD_CAST "fake", NULL, NULL);
5536
5537 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5538 NULL, NULL, value);
5539 }
5540 } else {
5541 URI = xmlParseExternalID(ctxt, &literal, 1);
5542 if ((URI == NULL) && (literal == NULL)) {
5543 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5544 }
5545 if (URI) {
5546 xmlURIPtr uri;
5547
5548 uri = xmlParseURI((const char *)URI);
5549 if (uri == NULL) {
5550 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5551 "Invalid URI: %s\n", URI);
5552 /*
5553 * This really ought to be a well formedness error
5554 * but the XML Core WG decided otherwise c.f. issue
5555 * E26 of the XML erratas.
5556 */
5557 } else {
5558 if (uri->fragment != NULL) {
5559 /*
5560 * Okay this is foolish to block those but not
5561 * invalid URIs.
5562 */
5563 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5564 }
5565 xmlFreeURI(uri);
5566 }
5567 }
5568 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5569 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5570 "Space required before 'NDATA'\n");
5571 }
5572 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5573 SKIP(5);
5574 if (SKIP_BLANKS == 0) {
5575 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5576 "Space required after 'NDATA'\n");
5577 }
5578 ndata = xmlParseName(ctxt);
5579 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5580 (ctxt->sax->unparsedEntityDecl != NULL))
5581 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5582 literal, URI, ndata);
5583 } else {
5584 if ((ctxt->sax != NULL) &&
5585 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5586 ctxt->sax->entityDecl(ctxt->userData, name,
5587 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5588 literal, URI, NULL);
5589 /*
5590 * For expat compatibility in SAX mode.
5591 * assuming the entity replacement was asked for
5592 */
5593 if ((ctxt->replaceEntities != 0) &&
5594 ((ctxt->myDoc == NULL) ||
5595 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5596 if (ctxt->myDoc == NULL) {
5597 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5598 if (ctxt->myDoc == NULL) {
5599 xmlErrMemory(ctxt, "New Doc failed");
5600 return;
5601 }
5602 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5603 }
5604
5605 if (ctxt->myDoc->intSubset == NULL)
5606 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5607 BAD_CAST "fake", NULL, NULL);
5608 xmlSAX2EntityDecl(ctxt, name,
5609 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5610 literal, URI, NULL);
5611 }
5612 }
5613 }
5614 }
5615 if (ctxt->instate == XML_PARSER_EOF)
5616 goto done;
5617 SKIP_BLANKS;
5618 if (RAW != '>') {
5619 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5620 "xmlParseEntityDecl: entity %s not terminated\n", name);
5621 xmlHaltParser(ctxt);
5622 } else {
5623 if (inputid != ctxt->input->id) {
5624 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5625 "Entity declaration doesn't start and stop in"
5626 " the same entity\n");
5627 }
5628 NEXT;
5629 }
5630 if (orig != NULL) {
5631 /*
5632 * Ugly mechanism to save the raw entity value.
5633 */
5634 xmlEntityPtr cur = NULL;
5635
5636 if (isParameter) {
5637 if ((ctxt->sax != NULL) &&
5638 (ctxt->sax->getParameterEntity != NULL))
5639 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5640 } else {
5641 if ((ctxt->sax != NULL) &&
5642 (ctxt->sax->getEntity != NULL))
5643 cur = ctxt->sax->getEntity(ctxt->userData, name);
5644 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5645 cur = xmlSAX2GetEntity(ctxt, name);
5646 }
5647 }
5648 if ((cur != NULL) && (cur->orig == NULL)) {
5649 cur->orig = orig;
5650 orig = NULL;
5651 }
5652 }
5653
5654 done:
5655 if (value != NULL) xmlFree(value);
5656 if (URI != NULL) xmlFree(URI);
5657 if (literal != NULL) xmlFree(literal);
5658 if (orig != NULL) xmlFree(orig);
5659 }
5660 }
5661
5662 /**
5663 * xmlParseDefaultDecl:
5664 * @ctxt: an XML parser context
5665 * @value: Receive a possible fixed default value for the attribute
5666 *
5667 * Parse an attribute default declaration
5668 *
5669 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5670 *
5671 * [ VC: Required Attribute ]
5672 * if the default declaration is the keyword #REQUIRED, then the
5673 * attribute must be specified for all elements of the type in the
5674 * attribute-list declaration.
5675 *
5676 * [ VC: Attribute Default Legal ]
5677 * The declared default value must meet the lexical constraints of
5678 * the declared attribute type c.f. xmlValidateAttributeDecl()
5679 *
5680 * [ VC: Fixed Attribute Default ]
5681 * if an attribute has a default value declared with the #FIXED
5682 * keyword, instances of that attribute must match the default value.
5683 *
5684 * [ WFC: No < in Attribute Values ]
5685 * handled in xmlParseAttValue()
5686 *
5687 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5688 * or XML_ATTRIBUTE_FIXED.
5689 */
5690
5691 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5692 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5693 int val;
5694 xmlChar *ret;
5695
5696 *value = NULL;
5697 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5698 SKIP(9);
5699 return(XML_ATTRIBUTE_REQUIRED);
5700 }
5701 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5702 SKIP(8);
5703 return(XML_ATTRIBUTE_IMPLIED);
5704 }
5705 val = XML_ATTRIBUTE_NONE;
5706 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5707 SKIP(6);
5708 val = XML_ATTRIBUTE_FIXED;
5709 if (SKIP_BLANKS == 0) {
5710 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5711 "Space required after '#FIXED'\n");
5712 }
5713 }
5714 ret = xmlParseAttValue(ctxt);
5715 ctxt->instate = XML_PARSER_DTD;
5716 if (ret == NULL) {
5717 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5718 "Attribute default value declaration error\n");
5719 } else
5720 *value = ret;
5721 return(val);
5722 }
5723
5724 /**
5725 * xmlParseNotationType:
5726 * @ctxt: an XML parser context
5727 *
5728 * parse an Notation attribute type.
5729 *
5730 * Note: the leading 'NOTATION' S part has already being parsed...
5731 *
5732 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5733 *
5734 * [ VC: Notation Attributes ]
5735 * Values of this type must match one of the notation names included
5736 * in the declaration; all notation names in the declaration must be declared.
5737 *
5738 * Returns: the notation attribute tree built while parsing
5739 */
5740
5741 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5742 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5743 const xmlChar *name;
5744 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5745
5746 if (RAW != '(') {
5747 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5748 return(NULL);
5749 }
5750 SHRINK;
5751 do {
5752 NEXT;
5753 SKIP_BLANKS;
5754 name = xmlParseName(ctxt);
5755 if (name == NULL) {
5756 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5757 "Name expected in NOTATION declaration\n");
5758 xmlFreeEnumeration(ret);
5759 return(NULL);
5760 }
5761 tmp = ret;
5762 while (tmp != NULL) {
5763 if (xmlStrEqual(name, tmp->name)) {
5764 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5765 "standalone: attribute notation value token %s duplicated\n",
5766 name, NULL);
5767 if (!xmlDictOwns(ctxt->dict, name))
5768 xmlFree((xmlChar *) name);
5769 break;
5770 }
5771 tmp = tmp->next;
5772 }
5773 if (tmp == NULL) {
5774 cur = xmlCreateEnumeration(name);
5775 if (cur == NULL) {
5776 xmlFreeEnumeration(ret);
5777 return(NULL);
5778 }
5779 if (last == NULL) ret = last = cur;
5780 else {
5781 last->next = cur;
5782 last = cur;
5783 }
5784 }
5785 SKIP_BLANKS;
5786 } while (RAW == '|');
5787 if (RAW != ')') {
5788 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5789 xmlFreeEnumeration(ret);
5790 return(NULL);
5791 }
5792 NEXT;
5793 return(ret);
5794 }
5795
5796 /**
5797 * xmlParseEnumerationType:
5798 * @ctxt: an XML parser context
5799 *
5800 * parse an Enumeration attribute type.
5801 *
5802 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5803 *
5804 * [ VC: Enumeration ]
5805 * Values of this type must match one of the Nmtoken tokens in
5806 * the declaration
5807 *
5808 * Returns: the enumeration attribute tree built while parsing
5809 */
5810
5811 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5812 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5813 xmlChar *name;
5814 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5815
5816 if (RAW != '(') {
5817 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5818 return(NULL);
5819 }
5820 SHRINK;
5821 do {
5822 NEXT;
5823 SKIP_BLANKS;
5824 name = xmlParseNmtoken(ctxt);
5825 if (name == NULL) {
5826 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5827 return(ret);
5828 }
5829 tmp = ret;
5830 while (tmp != NULL) {
5831 if (xmlStrEqual(name, tmp->name)) {
5832 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5833 "standalone: attribute enumeration value token %s duplicated\n",
5834 name, NULL);
5835 if (!xmlDictOwns(ctxt->dict, name))
5836 xmlFree(name);
5837 break;
5838 }
5839 tmp = tmp->next;
5840 }
5841 if (tmp == NULL) {
5842 cur = xmlCreateEnumeration(name);
5843 if (!xmlDictOwns(ctxt->dict, name))
5844 xmlFree(name);
5845 if (cur == NULL) {
5846 xmlFreeEnumeration(ret);
5847 return(NULL);
5848 }
5849 if (last == NULL) ret = last = cur;
5850 else {
5851 last->next = cur;
5852 last = cur;
5853 }
5854 }
5855 SKIP_BLANKS;
5856 } while (RAW == '|');
5857 if (RAW != ')') {
5858 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5859 return(ret);
5860 }
5861 NEXT;
5862 return(ret);
5863 }
5864
5865 /**
5866 * xmlParseEnumeratedType:
5867 * @ctxt: an XML parser context
5868 * @tree: the enumeration tree built while parsing
5869 *
5870 * parse an Enumerated attribute type.
5871 *
5872 * [57] EnumeratedType ::= NotationType | Enumeration
5873 *
5874 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5875 *
5876 *
5877 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5878 */
5879
5880 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5881 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5882 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5883 SKIP(8);
5884 if (SKIP_BLANKS == 0) {
5885 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5886 "Space required after 'NOTATION'\n");
5887 return(0);
5888 }
5889 *tree = xmlParseNotationType(ctxt);
5890 if (*tree == NULL) return(0);
5891 return(XML_ATTRIBUTE_NOTATION);
5892 }
5893 *tree = xmlParseEnumerationType(ctxt);
5894 if (*tree == NULL) return(0);
5895 return(XML_ATTRIBUTE_ENUMERATION);
5896 }
5897
5898 /**
5899 * xmlParseAttributeType:
5900 * @ctxt: an XML parser context
5901 * @tree: the enumeration tree built while parsing
5902 *
5903 * parse the Attribute list def for an element
5904 *
5905 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5906 *
5907 * [55] StringType ::= 'CDATA'
5908 *
5909 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5910 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5911 *
5912 * Validity constraints for attribute values syntax are checked in
5913 * xmlValidateAttributeValue()
5914 *
5915 * [ VC: ID ]
5916 * Values of type ID must match the Name production. A name must not
5917 * appear more than once in an XML document as a value of this type;
5918 * i.e., ID values must uniquely identify the elements which bear them.
5919 *
5920 * [ VC: One ID per Element Type ]
5921 * No element type may have more than one ID attribute specified.
5922 *
5923 * [ VC: ID Attribute Default ]
5924 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5925 *
5926 * [ VC: IDREF ]
5927 * Values of type IDREF must match the Name production, and values
5928 * of type IDREFS must match Names; each IDREF Name must match the value
5929 * of an ID attribute on some element in the XML document; i.e. IDREF
5930 * values must match the value of some ID attribute.
5931 *
5932 * [ VC: Entity Name ]
5933 * Values of type ENTITY must match the Name production, values
5934 * of type ENTITIES must match Names; each Entity Name must match the
5935 * name of an unparsed entity declared in the DTD.
5936 *
5937 * [ VC: Name Token ]
5938 * Values of type NMTOKEN must match the Nmtoken production; values
5939 * of type NMTOKENS must match Nmtokens.
5940 *
5941 * Returns the attribute type
5942 */
5943 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5944 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5945 SHRINK;
5946 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5947 SKIP(5);
5948 return(XML_ATTRIBUTE_CDATA);
5949 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5950 SKIP(6);
5951 return(XML_ATTRIBUTE_IDREFS);
5952 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5953 SKIP(5);
5954 return(XML_ATTRIBUTE_IDREF);
5955 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5956 SKIP(2);
5957 return(XML_ATTRIBUTE_ID);
5958 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5959 SKIP(6);
5960 return(XML_ATTRIBUTE_ENTITY);
5961 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5962 SKIP(8);
5963 return(XML_ATTRIBUTE_ENTITIES);
5964 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5965 SKIP(8);
5966 return(XML_ATTRIBUTE_NMTOKENS);
5967 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5968 SKIP(7);
5969 return(XML_ATTRIBUTE_NMTOKEN);
5970 }
5971 return(xmlParseEnumeratedType(ctxt, tree));
5972 }
5973
5974 /**
5975 * xmlParseAttributeListDecl:
5976 * @ctxt: an XML parser context
5977 *
5978 * : parse the Attribute list def for an element
5979 *
5980 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5981 *
5982 * [53] AttDef ::= S Name S AttType S DefaultDecl
5983 *
5984 */
5985 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5986 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5987 const xmlChar *elemName;
5988 const xmlChar *attrName;
5989 xmlEnumerationPtr tree;
5990
5991 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5992 int inputid = ctxt->input->id;
5993
5994 SKIP(9);
5995 if (SKIP_BLANKS == 0) {
5996 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5997 "Space required after '<!ATTLIST'\n");
5998 }
5999 elemName = xmlParseName(ctxt);
6000 if (elemName == NULL) {
6001 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6002 "ATTLIST: no name for Element\n");
6003 return;
6004 }
6005 SKIP_BLANKS;
6006 GROW;
6007 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6008 int type;
6009 int def;
6010 xmlChar *defaultValue = NULL;
6011
6012 GROW;
6013 tree = NULL;
6014 attrName = xmlParseName(ctxt);
6015 if (attrName == NULL) {
6016 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6017 "ATTLIST: no name for Attribute\n");
6018 break;
6019 }
6020 GROW;
6021 if (SKIP_BLANKS == 0) {
6022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023 "Space required after the attribute name\n");
6024 break;
6025 }
6026
6027 type = xmlParseAttributeType(ctxt, &tree);
6028 if (type <= 0) {
6029 break;
6030 }
6031
6032 GROW;
6033 if (SKIP_BLANKS == 0) {
6034 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6035 "Space required after the attribute type\n");
6036 if (tree != NULL)
6037 xmlFreeEnumeration(tree);
6038 break;
6039 }
6040
6041 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6042 if (def <= 0) {
6043 if (defaultValue != NULL)
6044 xmlFree(defaultValue);
6045 if (tree != NULL)
6046 xmlFreeEnumeration(tree);
6047 break;
6048 }
6049 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6050 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6051
6052 GROW;
6053 if (RAW != '>') {
6054 if (SKIP_BLANKS == 0) {
6055 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6056 "Space required after the attribute default value\n");
6057 if (defaultValue != NULL)
6058 xmlFree(defaultValue);
6059 if (tree != NULL)
6060 xmlFreeEnumeration(tree);
6061 break;
6062 }
6063 }
6064 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6065 (ctxt->sax->attributeDecl != NULL))
6066 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6067 type, def, defaultValue, tree);
6068 else if (tree != NULL)
6069 xmlFreeEnumeration(tree);
6070
6071 if ((ctxt->sax2) && (defaultValue != NULL) &&
6072 (def != XML_ATTRIBUTE_IMPLIED) &&
6073 (def != XML_ATTRIBUTE_REQUIRED)) {
6074 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6075 }
6076 if (ctxt->sax2) {
6077 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6078 }
6079 if (defaultValue != NULL)
6080 xmlFree(defaultValue);
6081 GROW;
6082 }
6083 if (RAW == '>') {
6084 if (inputid != ctxt->input->id) {
6085 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6086 "Attribute list declaration doesn't start and"
6087 " stop in the same entity\n");
6088 }
6089 NEXT;
6090 }
6091 }
6092 }
6093
6094 /**
6095 * xmlParseElementMixedContentDecl:
6096 * @ctxt: an XML parser context
6097 * @inputchk: the input used for the current entity, needed for boundary checks
6098 *
6099 * parse the declaration for a Mixed Element content
6100 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6101 *
6102 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6103 * '(' S? '#PCDATA' S? ')'
6104 *
6105 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6106 *
6107 * [ VC: No Duplicate Types ]
6108 * The same name must not appear more than once in a single
6109 * mixed-content declaration.
6110 *
6111 * returns: the list of the xmlElementContentPtr describing the element choices
6112 */
6113 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6114 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6115 xmlElementContentPtr ret = NULL, cur = NULL, n;
6116 const xmlChar *elem = NULL;
6117
6118 GROW;
6119 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6120 SKIP(7);
6121 SKIP_BLANKS;
6122 SHRINK;
6123 if (RAW == ')') {
6124 if (ctxt->input->id != inputchk) {
6125 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6126 "Element content declaration doesn't start and"
6127 " stop in the same entity\n");
6128 }
6129 NEXT;
6130 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6131 if (ret == NULL)
6132 return(NULL);
6133 if (RAW == '*') {
6134 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6135 NEXT;
6136 }
6137 return(ret);
6138 }
6139 if ((RAW == '(') || (RAW == '|')) {
6140 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6141 if (ret == NULL) return(NULL);
6142 }
6143 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6144 NEXT;
6145 if (elem == NULL) {
6146 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6147 if (ret == NULL) {
6148 xmlFreeDocElementContent(ctxt->myDoc, cur);
6149 return(NULL);
6150 }
6151 ret->c1 = cur;
6152 if (cur != NULL)
6153 cur->parent = ret;
6154 cur = ret;
6155 } else {
6156 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6157 if (n == NULL) {
6158 xmlFreeDocElementContent(ctxt->myDoc, ret);
6159 return(NULL);
6160 }
6161 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6162 if (n->c1 != NULL)
6163 n->c1->parent = n;
6164 cur->c2 = n;
6165 if (n != NULL)
6166 n->parent = cur;
6167 cur = n;
6168 }
6169 SKIP_BLANKS;
6170 elem = xmlParseName(ctxt);
6171 if (elem == NULL) {
6172 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6173 "xmlParseElementMixedContentDecl : Name expected\n");
6174 xmlFreeDocElementContent(ctxt->myDoc, ret);
6175 return(NULL);
6176 }
6177 SKIP_BLANKS;
6178 GROW;
6179 }
6180 if ((RAW == ')') && (NXT(1) == '*')) {
6181 if (elem != NULL) {
6182 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6183 XML_ELEMENT_CONTENT_ELEMENT);
6184 if (cur->c2 != NULL)
6185 cur->c2->parent = cur;
6186 }
6187 if (ret != NULL)
6188 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6189 if (ctxt->input->id != inputchk) {
6190 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6191 "Element content declaration doesn't start and"
6192 " stop in the same entity\n");
6193 }
6194 SKIP(2);
6195 } else {
6196 xmlFreeDocElementContent(ctxt->myDoc, ret);
6197 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6198 return(NULL);
6199 }
6200
6201 } else {
6202 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6203 }
6204 return(ret);
6205 }
6206
6207 /**
6208 * xmlParseElementChildrenContentDeclPriv:
6209 * @ctxt: an XML parser context
6210 * @inputchk: the input used for the current entity, needed for boundary checks
6211 * @depth: the level of recursion
6212 *
6213 * parse the declaration for a Mixed Element content
6214 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6215 *
6216 *
6217 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6218 *
6219 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6220 *
6221 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6222 *
6223 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6224 *
6225 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6226 * TODO Parameter-entity replacement text must be properly nested
6227 * with parenthesized groups. That is to say, if either of the
6228 * opening or closing parentheses in a choice, seq, or Mixed
6229 * construct is contained in the replacement text for a parameter
6230 * entity, both must be contained in the same replacement text. For
6231 * interoperability, if a parameter-entity reference appears in a
6232 * choice, seq, or Mixed construct, its replacement text should not
6233 * be empty, and neither the first nor last non-blank character of
6234 * the replacement text should be a connector (| or ,).
6235 *
6236 * Returns the tree of xmlElementContentPtr describing the element
6237 * hierarchy.
6238 */
6239 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6240 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6241 int depth) {
6242 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6243 const xmlChar *elem;
6244 xmlChar type = 0;
6245
6246 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6247 (depth > 2048)) {
6248 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6249 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6250 depth);
6251 return(NULL);
6252 }
6253 SKIP_BLANKS;
6254 GROW;
6255 if (RAW == '(') {
6256 int inputid = ctxt->input->id;
6257
6258 /* Recurse on first child */
6259 NEXT;
6260 SKIP_BLANKS;
6261 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6262 depth + 1);
6263 if (cur == NULL)
6264 return(NULL);
6265 SKIP_BLANKS;
6266 GROW;
6267 } else {
6268 elem = xmlParseName(ctxt);
6269 if (elem == NULL) {
6270 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6271 return(NULL);
6272 }
6273 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6274 if (cur == NULL) {
6275 xmlErrMemory(ctxt, NULL);
6276 return(NULL);
6277 }
6278 GROW;
6279 if (RAW == '?') {
6280 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6281 NEXT;
6282 } else if (RAW == '*') {
6283 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6284 NEXT;
6285 } else if (RAW == '+') {
6286 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6287 NEXT;
6288 } else {
6289 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6290 }
6291 GROW;
6292 }
6293 SKIP_BLANKS;
6294 SHRINK;
6295 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6296 /*
6297 * Each loop we parse one separator and one element.
6298 */
6299 if (RAW == ',') {
6300 if (type == 0) type = CUR;
6301
6302 /*
6303 * Detect "Name | Name , Name" error
6304 */
6305 else if (type != CUR) {
6306 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6307 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6308 type);
6309 if ((last != NULL) && (last != ret))
6310 xmlFreeDocElementContent(ctxt->myDoc, last);
6311 if (ret != NULL)
6312 xmlFreeDocElementContent(ctxt->myDoc, ret);
6313 return(NULL);
6314 }
6315 NEXT;
6316
6317 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6318 if (op == NULL) {
6319 if ((last != NULL) && (last != ret))
6320 xmlFreeDocElementContent(ctxt->myDoc, last);
6321 xmlFreeDocElementContent(ctxt->myDoc, ret);
6322 return(NULL);
6323 }
6324 if (last == NULL) {
6325 op->c1 = ret;
6326 if (ret != NULL)
6327 ret->parent = op;
6328 ret = cur = op;
6329 } else {
6330 cur->c2 = op;
6331 if (op != NULL)
6332 op->parent = cur;
6333 op->c1 = last;
6334 if (last != NULL)
6335 last->parent = op;
6336 cur =op;
6337 last = NULL;
6338 }
6339 } else if (RAW == '|') {
6340 if (type == 0) type = CUR;
6341
6342 /*
6343 * Detect "Name , Name | Name" error
6344 */
6345 else if (type != CUR) {
6346 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6347 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6348 type);
6349 if ((last != NULL) && (last != ret))
6350 xmlFreeDocElementContent(ctxt->myDoc, last);
6351 if (ret != NULL)
6352 xmlFreeDocElementContent(ctxt->myDoc, ret);
6353 return(NULL);
6354 }
6355 NEXT;
6356
6357 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6358 if (op == NULL) {
6359 if ((last != NULL) && (last != ret))
6360 xmlFreeDocElementContent(ctxt->myDoc, last);
6361 if (ret != NULL)
6362 xmlFreeDocElementContent(ctxt->myDoc, ret);
6363 return(NULL);
6364 }
6365 if (last == NULL) {
6366 op->c1 = ret;
6367 if (ret != NULL)
6368 ret->parent = op;
6369 ret = cur = op;
6370 } else {
6371 cur->c2 = op;
6372 if (op != NULL)
6373 op->parent = cur;
6374 op->c1 = last;
6375 if (last != NULL)
6376 last->parent = op;
6377 cur =op;
6378 last = NULL;
6379 }
6380 } else {
6381 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6382 if ((last != NULL) && (last != ret))
6383 xmlFreeDocElementContent(ctxt->myDoc, last);
6384 if (ret != NULL)
6385 xmlFreeDocElementContent(ctxt->myDoc, ret);
6386 return(NULL);
6387 }
6388 GROW;
6389 SKIP_BLANKS;
6390 GROW;
6391 if (RAW == '(') {
6392 int inputid = ctxt->input->id;
6393 /* Recurse on second child */
6394 NEXT;
6395 SKIP_BLANKS;
6396 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6397 depth + 1);
6398 if (last == NULL) {
6399 if (ret != NULL)
6400 xmlFreeDocElementContent(ctxt->myDoc, ret);
6401 return(NULL);
6402 }
6403 SKIP_BLANKS;
6404 } else {
6405 elem = xmlParseName(ctxt);
6406 if (elem == NULL) {
6407 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6408 if (ret != NULL)
6409 xmlFreeDocElementContent(ctxt->myDoc, ret);
6410 return(NULL);
6411 }
6412 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6413 if (last == NULL) {
6414 if (ret != NULL)
6415 xmlFreeDocElementContent(ctxt->myDoc, ret);
6416 return(NULL);
6417 }
6418 if (RAW == '?') {
6419 last->ocur = XML_ELEMENT_CONTENT_OPT;
6420 NEXT;
6421 } else if (RAW == '*') {
6422 last->ocur = XML_ELEMENT_CONTENT_MULT;
6423 NEXT;
6424 } else if (RAW == '+') {
6425 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6426 NEXT;
6427 } else {
6428 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6429 }
6430 }
6431 SKIP_BLANKS;
6432 GROW;
6433 }
6434 if ((cur != NULL) && (last != NULL)) {
6435 cur->c2 = last;
6436 if (last != NULL)
6437 last->parent = cur;
6438 }
6439 if (ctxt->input->id != inputchk) {
6440 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6441 "Element content declaration doesn't start and stop in"
6442 " the same entity\n");
6443 }
6444 NEXT;
6445 if (RAW == '?') {
6446 if (ret != NULL) {
6447 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6448 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6449 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6450 else
6451 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6452 }
6453 NEXT;
6454 } else if (RAW == '*') {
6455 if (ret != NULL) {
6456 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6457 cur = ret;
6458 /*
6459 * Some normalization:
6460 * (a | b* | c?)* == (a | b | c)*
6461 */
6462 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6463 if ((cur->c1 != NULL) &&
6464 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6465 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6466 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6467 if ((cur->c2 != NULL) &&
6468 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6469 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6470 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6471 cur = cur->c2;
6472 }
6473 }
6474 NEXT;
6475 } else if (RAW == '+') {
6476 if (ret != NULL) {
6477 int found = 0;
6478
6479 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6480 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6481 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6482 else
6483 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6484 /*
6485 * Some normalization:
6486 * (a | b*)+ == (a | b)*
6487 * (a | b?)+ == (a | b)*
6488 */
6489 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6490 if ((cur->c1 != NULL) &&
6491 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6492 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6493 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6494 found = 1;
6495 }
6496 if ((cur->c2 != NULL) &&
6497 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6499 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6500 found = 1;
6501 }
6502 cur = cur->c2;
6503 }
6504 if (found)
6505 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6506 }
6507 NEXT;
6508 }
6509 return(ret);
6510 }
6511
6512 /**
6513 * xmlParseElementChildrenContentDecl:
6514 * @ctxt: an XML parser context
6515 * @inputchk: the input used for the current entity, needed for boundary checks
6516 *
6517 * parse the declaration for a Mixed Element content
6518 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6519 *
6520 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6521 *
6522 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6523 *
6524 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6525 *
6526 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6527 *
6528 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6529 * TODO Parameter-entity replacement text must be properly nested
6530 * with parenthesized groups. That is to say, if either of the
6531 * opening or closing parentheses in a choice, seq, or Mixed
6532 * construct is contained in the replacement text for a parameter
6533 * entity, both must be contained in the same replacement text. For
6534 * interoperability, if a parameter-entity reference appears in a
6535 * choice, seq, or Mixed construct, its replacement text should not
6536 * be empty, and neither the first nor last non-blank character of
6537 * the replacement text should be a connector (| or ,).
6538 *
6539 * Returns the tree of xmlElementContentPtr describing the element
6540 * hierarchy.
6541 */
6542 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6543 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6544 /* stub left for API/ABI compat */
6545 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6546 }
6547
6548 /**
6549 * xmlParseElementContentDecl:
6550 * @ctxt: an XML parser context
6551 * @name: the name of the element being defined.
6552 * @result: the Element Content pointer will be stored here if any
6553 *
6554 * parse the declaration for an Element content either Mixed or Children,
6555 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6556 *
6557 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6558 *
6559 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6560 */
6561
6562 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6563 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6564 xmlElementContentPtr *result) {
6565
6566 xmlElementContentPtr tree = NULL;
6567 int inputid = ctxt->input->id;
6568 int res;
6569
6570 *result = NULL;
6571
6572 if (RAW != '(') {
6573 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6574 "xmlParseElementContentDecl : %s '(' expected\n", name);
6575 return(-1);
6576 }
6577 NEXT;
6578 GROW;
6579 if (ctxt->instate == XML_PARSER_EOF)
6580 return(-1);
6581 SKIP_BLANKS;
6582 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6583 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6584 res = XML_ELEMENT_TYPE_MIXED;
6585 } else {
6586 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6587 res = XML_ELEMENT_TYPE_ELEMENT;
6588 }
6589 SKIP_BLANKS;
6590 *result = tree;
6591 return(res);
6592 }
6593
6594 /**
6595 * xmlParseElementDecl:
6596 * @ctxt: an XML parser context
6597 *
6598 * parse an Element declaration.
6599 *
6600 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6601 *
6602 * [ VC: Unique Element Type Declaration ]
6603 * No element type may be declared more than once
6604 *
6605 * Returns the type of the element, or -1 in case of error
6606 */
6607 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6608 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6609 const xmlChar *name;
6610 int ret = -1;
6611 xmlElementContentPtr content = NULL;
6612
6613 /* GROW; done in the caller */
6614 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6615 int inputid = ctxt->input->id;
6616
6617 SKIP(9);
6618 if (SKIP_BLANKS == 0) {
6619 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6620 "Space required after 'ELEMENT'\n");
6621 return(-1);
6622 }
6623 name = xmlParseName(ctxt);
6624 if (name == NULL) {
6625 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6626 "xmlParseElementDecl: no name for Element\n");
6627 return(-1);
6628 }
6629 if (SKIP_BLANKS == 0) {
6630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6631 "Space required after the element name\n");
6632 }
6633 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6634 SKIP(5);
6635 /*
6636 * Element must always be empty.
6637 */
6638 ret = XML_ELEMENT_TYPE_EMPTY;
6639 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6640 (NXT(2) == 'Y')) {
6641 SKIP(3);
6642 /*
6643 * Element is a generic container.
6644 */
6645 ret = XML_ELEMENT_TYPE_ANY;
6646 } else if (RAW == '(') {
6647 ret = xmlParseElementContentDecl(ctxt, name, &content);
6648 } else {
6649 /*
6650 * [ WFC: PEs in Internal Subset ] error handling.
6651 */
6652 if ((RAW == '%') && (ctxt->external == 0) &&
6653 (ctxt->inputNr == 1)) {
6654 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6655 "PEReference: forbidden within markup decl in internal subset\n");
6656 } else {
6657 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6658 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6659 }
6660 return(-1);
6661 }
6662
6663 SKIP_BLANKS;
6664
6665 if (RAW != '>') {
6666 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6667 if (content != NULL) {
6668 xmlFreeDocElementContent(ctxt->myDoc, content);
6669 }
6670 } else {
6671 if (inputid != ctxt->input->id) {
6672 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6673 "Element declaration doesn't start and stop in"
6674 " the same entity\n");
6675 }
6676
6677 NEXT;
6678 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6679 (ctxt->sax->elementDecl != NULL)) {
6680 if (content != NULL)
6681 content->parent = NULL;
6682 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6683 content);
6684 if ((content != NULL) && (content->parent == NULL)) {
6685 /*
6686 * this is a trick: if xmlAddElementDecl is called,
6687 * instead of copying the full tree it is plugged directly
6688 * if called from the parser. Avoid duplicating the
6689 * interfaces or change the API/ABI
6690 */
6691 xmlFreeDocElementContent(ctxt->myDoc, content);
6692 }
6693 } else if (content != NULL) {
6694 xmlFreeDocElementContent(ctxt->myDoc, content);
6695 }
6696 }
6697 }
6698 return(ret);
6699 }
6700
6701 /**
6702 * xmlParseConditionalSections
6703 * @ctxt: an XML parser context
6704 *
6705 * [61] conditionalSect ::= includeSect | ignoreSect
6706 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6707 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6708 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6709 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6710 */
6711
6712 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6713 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6714 int *inputIds = NULL;
6715 size_t inputIdsSize = 0;
6716 size_t depth = 0;
6717
6718 while (ctxt->instate != XML_PARSER_EOF) {
6719 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6720 int id = ctxt->input->id;
6721
6722 SKIP(3);
6723 SKIP_BLANKS;
6724
6725 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6726 SKIP(7);
6727 SKIP_BLANKS;
6728 if (RAW != '[') {
6729 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6730 xmlHaltParser(ctxt);
6731 goto error;
6732 }
6733 if (ctxt->input->id != id) {
6734 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6735 "All markup of the conditional section is"
6736 " not in the same entity\n");
6737 }
6738 NEXT;
6739
6740 if (inputIdsSize <= depth) {
6741 int *tmp;
6742
6743 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6744 tmp = (int *) xmlRealloc(inputIds,
6745 inputIdsSize * sizeof(int));
6746 if (tmp == NULL) {
6747 xmlErrMemory(ctxt, NULL);
6748 goto error;
6749 }
6750 inputIds = tmp;
6751 }
6752 inputIds[depth] = id;
6753 depth++;
6754 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6755 int state;
6756 xmlParserInputState instate;
6757 size_t ignoreDepth = 0;
6758
6759 SKIP(6);
6760 SKIP_BLANKS;
6761 if (RAW != '[') {
6762 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6763 xmlHaltParser(ctxt);
6764 goto error;
6765 }
6766 if (ctxt->input->id != id) {
6767 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6768 "All markup of the conditional section is"
6769 " not in the same entity\n");
6770 }
6771 NEXT;
6772
6773 /*
6774 * Parse up to the end of the conditional section but disable
6775 * SAX event generating DTD building in the meantime
6776 */
6777 state = ctxt->disableSAX;
6778 instate = ctxt->instate;
6779 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6780 ctxt->instate = XML_PARSER_IGNORE;
6781
6782 while (RAW != 0) {
6783 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6784 SKIP(3);
6785 ignoreDepth++;
6786 /* Check for integer overflow */
6787 if (ignoreDepth == 0) {
6788 xmlErrMemory(ctxt, NULL);
6789 goto error;
6790 }
6791 } else if ((RAW == ']') && (NXT(1) == ']') &&
6792 (NXT(2) == '>')) {
6793 if (ignoreDepth == 0)
6794 break;
6795 SKIP(3);
6796 ignoreDepth--;
6797 } else {
6798 NEXT;
6799 }
6800 }
6801
6802 ctxt->disableSAX = state;
6803 ctxt->instate = instate;
6804
6805 if (RAW == 0) {
6806 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6807 goto error;
6808 }
6809 if (ctxt->input->id != id) {
6810 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6811 "All markup of the conditional section is"
6812 " not in the same entity\n");
6813 }
6814 SKIP(3);
6815 } else {
6816 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6817 xmlHaltParser(ctxt);
6818 goto error;
6819 }
6820 } else if ((depth > 0) &&
6821 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6822 depth--;
6823 if (ctxt->input->id != inputIds[depth]) {
6824 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6825 "All markup of the conditional section is not"
6826 " in the same entity\n");
6827 }
6828 SKIP(3);
6829 } else {
6830 const xmlChar *check = CUR_PTR;
6831 unsigned int cons = ctxt->input->consumed;
6832
6833 xmlParseMarkupDecl(ctxt);
6834
6835 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6836 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6837 xmlHaltParser(ctxt);
6838 goto error;
6839 }
6840 }
6841
6842 if (depth == 0)
6843 break;
6844
6845 SKIP_BLANKS;
6846 GROW;
6847 }
6848
6849 error:
6850 xmlFree(inputIds);
6851 }
6852
6853 /**
6854 * xmlParseMarkupDecl:
6855 * @ctxt: an XML parser context
6856 *
6857 * parse Markup declarations
6858 *
6859 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6860 * NotationDecl | PI | Comment
6861 *
6862 * [ VC: Proper Declaration/PE Nesting ]
6863 * Parameter-entity replacement text must be properly nested with
6864 * markup declarations. That is to say, if either the first character
6865 * or the last character of a markup declaration (markupdecl above) is
6866 * contained in the replacement text for a parameter-entity reference,
6867 * both must be contained in the same replacement text.
6868 *
6869 * [ WFC: PEs in Internal Subset ]
6870 * In the internal DTD subset, parameter-entity references can occur
6871 * only where markup declarations can occur, not within markup declarations.
6872 * (This does not apply to references that occur in external parameter
6873 * entities or to the external subset.)
6874 */
6875 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6876 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6877 GROW;
6878 if (CUR == '<') {
6879 if (NXT(1) == '!') {
6880 switch (NXT(2)) {
6881 case 'E':
6882 if (NXT(3) == 'L')
6883 xmlParseElementDecl(ctxt);
6884 else if (NXT(3) == 'N')
6885 xmlParseEntityDecl(ctxt);
6886 break;
6887 case 'A':
6888 xmlParseAttributeListDecl(ctxt);
6889 break;
6890 case 'N':
6891 xmlParseNotationDecl(ctxt);
6892 break;
6893 case '-':
6894 xmlParseComment(ctxt);
6895 break;
6896 default:
6897 /* there is an error but it will be detected later */
6898 break;
6899 }
6900 } else if (NXT(1) == '?') {
6901 xmlParsePI(ctxt);
6902 }
6903 }
6904
6905 /*
6906 * detect requirement to exit there and act accordingly
6907 * and avoid having instate overridden later on
6908 */
6909 if (ctxt->instate == XML_PARSER_EOF)
6910 return;
6911
6912 ctxt->instate = XML_PARSER_DTD;
6913 }
6914
6915 /**
6916 * xmlParseTextDecl:
6917 * @ctxt: an XML parser context
6918 *
6919 * parse an XML declaration header for external entities
6920 *
6921 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6922 */
6923
6924 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6925 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6926 xmlChar *version;
6927 const xmlChar *encoding;
6928 int oldstate;
6929
6930 /*
6931 * We know that '<?xml' is here.
6932 */
6933 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6934 SKIP(5);
6935 } else {
6936 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6937 return;
6938 }
6939
6940 /* Avoid expansion of parameter entities when skipping blanks. */
6941 oldstate = ctxt->instate;
6942 ctxt->instate = XML_PARSER_START;
6943
6944 if (SKIP_BLANKS == 0) {
6945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6946 "Space needed after '<?xml'\n");
6947 }
6948
6949 /*
6950 * We may have the VersionInfo here.
6951 */
6952 version = xmlParseVersionInfo(ctxt);
6953 if (version == NULL)
6954 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6955 else {
6956 if (SKIP_BLANKS == 0) {
6957 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6958 "Space needed here\n");
6959 }
6960 }
6961 ctxt->input->version = version;
6962
6963 /*
6964 * We must have the encoding declaration
6965 */
6966 encoding = xmlParseEncodingDecl(ctxt);
6967 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6968 /*
6969 * The XML REC instructs us to stop parsing right here
6970 */
6971 ctxt->instate = oldstate;
6972 return;
6973 }
6974 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6975 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6976 "Missing encoding in text declaration\n");
6977 }
6978
6979 SKIP_BLANKS;
6980 if ((RAW == '?') && (NXT(1) == '>')) {
6981 SKIP(2);
6982 } else if (RAW == '>') {
6983 /* Deprecated old WD ... */
6984 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6985 NEXT;
6986 } else {
6987 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6988 MOVETO_ENDTAG(CUR_PTR);
6989 NEXT;
6990 }
6991
6992 ctxt->instate = oldstate;
6993 }
6994
6995 /**
6996 * xmlParseExternalSubset:
6997 * @ctxt: an XML parser context
6998 * @ExternalID: the external identifier
6999 * @SystemID: the system identifier (or URL)
7000 *
7001 * parse Markup declarations from an external subset
7002 *
7003 * [30] extSubset ::= textDecl? extSubsetDecl
7004 *
7005 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7006 */
7007 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7008 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7009 const xmlChar *SystemID) {
7010 xmlDetectSAX2(ctxt);
7011 GROW;
7012
7013 if ((ctxt->encoding == NULL) &&
7014 (ctxt->input->end - ctxt->input->cur >= 4)) {
7015 xmlChar start[4];
7016 xmlCharEncoding enc;
7017
7018 start[0] = RAW;
7019 start[1] = NXT(1);
7020 start[2] = NXT(2);
7021 start[3] = NXT(3);
7022 enc = xmlDetectCharEncoding(start, 4);
7023 if (enc != XML_CHAR_ENCODING_NONE)
7024 xmlSwitchEncoding(ctxt, enc);
7025 }
7026
7027 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7028 xmlParseTextDecl(ctxt);
7029 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7030 /*
7031 * The XML REC instructs us to stop parsing right here
7032 */
7033 xmlHaltParser(ctxt);
7034 return;
7035 }
7036 }
7037 if (ctxt->myDoc == NULL) {
7038 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7039 if (ctxt->myDoc == NULL) {
7040 xmlErrMemory(ctxt, "New Doc failed");
7041 return;
7042 }
7043 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7044 }
7045 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7046 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7047
7048 ctxt->instate = XML_PARSER_DTD;
7049 ctxt->external = 1;
7050 SKIP_BLANKS;
7051 while (((RAW == '<') && (NXT(1) == '?')) ||
7052 ((RAW == '<') && (NXT(1) == '!')) ||
7053 (RAW == '%')) {
7054 const xmlChar *check = CUR_PTR;
7055 unsigned int cons = ctxt->input->consumed;
7056
7057 GROW;
7058 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7059 xmlParseConditionalSections(ctxt);
7060 } else
7061 xmlParseMarkupDecl(ctxt);
7062 SKIP_BLANKS;
7063
7064 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7065 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7066 break;
7067 }
7068 }
7069
7070 if (RAW != 0) {
7071 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7072 }
7073
7074 }
7075
7076 /**
7077 * xmlParseReference:
7078 * @ctxt: an XML parser context
7079 *
7080 * parse and handle entity references in content, depending on the SAX
7081 * interface, this may end-up in a call to character() if this is a
7082 * CharRef, a predefined entity, if there is no reference() callback.
7083 * or if the parser was asked to switch to that mode.
7084 *
7085 * [67] Reference ::= EntityRef | CharRef
7086 */
7087 void
xmlParseReference(xmlParserCtxtPtr ctxt)7088 xmlParseReference(xmlParserCtxtPtr ctxt) {
7089 xmlEntityPtr ent;
7090 xmlChar *val;
7091 int was_checked;
7092 xmlNodePtr list = NULL;
7093 xmlParserErrors ret = XML_ERR_OK;
7094
7095
7096 if (RAW != '&')
7097 return;
7098
7099 /*
7100 * Simple case of a CharRef
7101 */
7102 if (NXT(1) == '#') {
7103 int i = 0;
7104 xmlChar out[16];
7105 int hex = NXT(2);
7106 int value = xmlParseCharRef(ctxt);
7107
7108 if (value == 0)
7109 return;
7110 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7111 /*
7112 * So we are using non-UTF-8 buffers
7113 * Check that the char fit on 8bits, if not
7114 * generate a CharRef.
7115 */
7116 if (value <= 0xFF) {
7117 out[0] = value;
7118 out[1] = 0;
7119 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7120 (!ctxt->disableSAX))
7121 ctxt->sax->characters(ctxt->userData, out, 1);
7122 } else {
7123 if ((hex == 'x') || (hex == 'X'))
7124 snprintf((char *)out, sizeof(out), "#x%X", value);
7125 else
7126 snprintf((char *)out, sizeof(out), "#%d", value);
7127 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7128 (!ctxt->disableSAX))
7129 ctxt->sax->reference(ctxt->userData, out);
7130 }
7131 } else {
7132 /*
7133 * Just encode the value in UTF-8
7134 */
7135 COPY_BUF(0 ,out, i, value);
7136 out[i] = 0;
7137 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7138 (!ctxt->disableSAX))
7139 ctxt->sax->characters(ctxt->userData, out, i);
7140 }
7141 return;
7142 }
7143
7144 /*
7145 * We are seeing an entity reference
7146 */
7147 ent = xmlParseEntityRef(ctxt);
7148 if (ent == NULL) return;
7149 if (!ctxt->wellFormed)
7150 return;
7151 was_checked = ent->checked;
7152
7153 /* special case of predefined entities */
7154 if ((ent->name == NULL) ||
7155 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7156 val = ent->content;
7157 if (val == NULL) return;
7158 /*
7159 * inline the entity.
7160 */
7161 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7162 (!ctxt->disableSAX))
7163 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7164 return;
7165 }
7166
7167 /*
7168 * The first reference to the entity trigger a parsing phase
7169 * where the ent->children is filled with the result from
7170 * the parsing.
7171 * Note: external parsed entities will not be loaded, it is not
7172 * required for a non-validating parser, unless the parsing option
7173 * of validating, or substituting entities were given. Doing so is
7174 * far more secure as the parser will only process data coming from
7175 * the document entity by default.
7176 */
7177 if (((ent->checked == 0) ||
7178 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7179 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7180 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7181 unsigned long oldnbent = ctxt->nbentities, diff;
7182
7183 /*
7184 * This is a bit hackish but this seems the best
7185 * way to make sure both SAX and DOM entity support
7186 * behaves okay.
7187 */
7188 void *user_data;
7189 if (ctxt->userData == ctxt)
7190 user_data = NULL;
7191 else
7192 user_data = ctxt->userData;
7193
7194 /*
7195 * Check that this entity is well formed
7196 * 4.3.2: An internal general parsed entity is well-formed
7197 * if its replacement text matches the production labeled
7198 * content.
7199 */
7200 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7201 ctxt->depth++;
7202 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7203 user_data, &list);
7204 ctxt->depth--;
7205
7206 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7207 ctxt->depth++;
7208 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7209 user_data, ctxt->depth, ent->URI,
7210 ent->ExternalID, &list);
7211 ctxt->depth--;
7212 } else {
7213 ret = XML_ERR_ENTITY_PE_INTERNAL;
7214 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7215 "invalid entity type found\n", NULL);
7216 }
7217
7218 /*
7219 * Store the number of entities needing parsing for this entity
7220 * content and do checkings
7221 */
7222 diff = ctxt->nbentities - oldnbent + 1;
7223 if (diff > INT_MAX / 2)
7224 diff = INT_MAX / 2;
7225 ent->checked = diff * 2;
7226 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7227 ent->checked |= 1;
7228 if (ret == XML_ERR_ENTITY_LOOP) {
7229 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7230 xmlHaltParser(ctxt);
7231 xmlFreeNodeList(list);
7232 return;
7233 }
7234 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7235 xmlFreeNodeList(list);
7236 return;
7237 }
7238
7239 if ((ret == XML_ERR_OK) && (list != NULL)) {
7240 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7241 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7242 (ent->children == NULL)) {
7243 ent->children = list;
7244 /*
7245 * Prune it directly in the generated document
7246 * except for single text nodes.
7247 */
7248 if ((ctxt->replaceEntities == 0) ||
7249 (ctxt->parseMode == XML_PARSE_READER) ||
7250 ((list->type == XML_TEXT_NODE) &&
7251 (list->next == NULL))) {
7252 ent->owner = 1;
7253 while (list != NULL) {
7254 list->parent = (xmlNodePtr) ent;
7255 xmlSetTreeDoc(list, ent->doc);
7256 if (list->next == NULL)
7257 ent->last = list;
7258 list = list->next;
7259 }
7260 list = NULL;
7261 } else {
7262 ent->owner = 0;
7263 while (list != NULL) {
7264 list->parent = (xmlNodePtr) ctxt->node;
7265 list->doc = ctxt->myDoc;
7266 if (list->next == NULL)
7267 ent->last = list;
7268 list = list->next;
7269 }
7270 list = ent->children;
7271 #ifdef LIBXML_LEGACY_ENABLED
7272 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7273 xmlAddEntityReference(ent, list, NULL);
7274 #endif /* LIBXML_LEGACY_ENABLED */
7275 }
7276 } else {
7277 xmlFreeNodeList(list);
7278 list = NULL;
7279 }
7280 } else if ((ret != XML_ERR_OK) &&
7281 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7282 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7283 "Entity '%s' failed to parse\n", ent->name);
7284 if (ent->content != NULL)
7285 ent->content[0] = 0;
7286 xmlParserEntityCheck(ctxt, 0, ent, 0);
7287 } else if (list != NULL) {
7288 xmlFreeNodeList(list);
7289 list = NULL;
7290 }
7291 if (ent->checked == 0)
7292 ent->checked = 2;
7293
7294 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7295 was_checked = 0;
7296 } else if (ent->checked != 1) {
7297 ctxt->nbentities += ent->checked / 2;
7298 }
7299
7300 /*
7301 * Now that the entity content has been gathered
7302 * provide it to the application, this can take different forms based
7303 * on the parsing modes.
7304 */
7305 if (ent->children == NULL) {
7306 /*
7307 * Probably running in SAX mode and the callbacks don't
7308 * build the entity content. So unless we already went
7309 * though parsing for first checking go though the entity
7310 * content to generate callbacks associated to the entity
7311 */
7312 if (was_checked != 0) {
7313 void *user_data;
7314 /*
7315 * This is a bit hackish but this seems the best
7316 * way to make sure both SAX and DOM entity support
7317 * behaves okay.
7318 */
7319 if (ctxt->userData == ctxt)
7320 user_data = NULL;
7321 else
7322 user_data = ctxt->userData;
7323
7324 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7325 ctxt->depth++;
7326 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7327 ent->content, user_data, NULL);
7328 ctxt->depth--;
7329 } else if (ent->etype ==
7330 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7331 ctxt->depth++;
7332 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7333 ctxt->sax, user_data, ctxt->depth,
7334 ent->URI, ent->ExternalID, NULL);
7335 ctxt->depth--;
7336 } else {
7337 ret = XML_ERR_ENTITY_PE_INTERNAL;
7338 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7339 "invalid entity type found\n", NULL);
7340 }
7341 if (ret == XML_ERR_ENTITY_LOOP) {
7342 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7343 return;
7344 }
7345 }
7346 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7347 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7348 /*
7349 * Entity reference callback comes second, it's somewhat
7350 * superfluous but a compatibility to historical behaviour
7351 */
7352 ctxt->sax->reference(ctxt->userData, ent->name);
7353 }
7354 return;
7355 }
7356
7357 /*
7358 * If we didn't get any children for the entity being built
7359 */
7360 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7361 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7362 /*
7363 * Create a node.
7364 */
7365 ctxt->sax->reference(ctxt->userData, ent->name);
7366 return;
7367 }
7368
7369 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7370 /*
7371 * There is a problem on the handling of _private for entities
7372 * (bug 155816): Should we copy the content of the field from
7373 * the entity (possibly overwriting some value set by the user
7374 * when a copy is created), should we leave it alone, or should
7375 * we try to take care of different situations? The problem
7376 * is exacerbated by the usage of this field by the xmlReader.
7377 * To fix this bug, we look at _private on the created node
7378 * and, if it's NULL, we copy in whatever was in the entity.
7379 * If it's not NULL we leave it alone. This is somewhat of a
7380 * hack - maybe we should have further tests to determine
7381 * what to do.
7382 */
7383 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7384 /*
7385 * Seems we are generating the DOM content, do
7386 * a simple tree copy for all references except the first
7387 * In the first occurrence list contains the replacement.
7388 */
7389 if (((list == NULL) && (ent->owner == 0)) ||
7390 (ctxt->parseMode == XML_PARSE_READER)) {
7391 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7392
7393 /*
7394 * We are copying here, make sure there is no abuse
7395 */
7396 ctxt->sizeentcopy += ent->length + 5;
7397 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7398 return;
7399
7400 /*
7401 * when operating on a reader, the entities definitions
7402 * are always owning the entities subtree.
7403 if (ctxt->parseMode == XML_PARSE_READER)
7404 ent->owner = 1;
7405 */
7406
7407 cur = ent->children;
7408 while (cur != NULL) {
7409 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7410 if (nw != NULL) {
7411 if (nw->_private == NULL)
7412 nw->_private = cur->_private;
7413 if (firstChild == NULL){
7414 firstChild = nw;
7415 }
7416 nw = xmlAddChild(ctxt->node, nw);
7417 }
7418 if (cur == ent->last) {
7419 /*
7420 * needed to detect some strange empty
7421 * node cases in the reader tests
7422 */
7423 if ((ctxt->parseMode == XML_PARSE_READER) &&
7424 (nw != NULL) &&
7425 (nw->type == XML_ELEMENT_NODE) &&
7426 (nw->children == NULL))
7427 nw->extra = 1;
7428
7429 break;
7430 }
7431 cur = cur->next;
7432 }
7433 #ifdef LIBXML_LEGACY_ENABLED
7434 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7435 xmlAddEntityReference(ent, firstChild, nw);
7436 #endif /* LIBXML_LEGACY_ENABLED */
7437 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7438 xmlNodePtr nw = NULL, cur, next, last,
7439 firstChild = NULL;
7440
7441 /*
7442 * We are copying here, make sure there is no abuse
7443 */
7444 ctxt->sizeentcopy += ent->length + 5;
7445 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7446 return;
7447
7448 /*
7449 * Copy the entity child list and make it the new
7450 * entity child list. The goal is to make sure any
7451 * ID or REF referenced will be the one from the
7452 * document content and not the entity copy.
7453 */
7454 cur = ent->children;
7455 ent->children = NULL;
7456 last = ent->last;
7457 ent->last = NULL;
7458 while (cur != NULL) {
7459 next = cur->next;
7460 cur->next = NULL;
7461 cur->parent = NULL;
7462 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7463 if (nw != NULL) {
7464 if (nw->_private == NULL)
7465 nw->_private = cur->_private;
7466 if (firstChild == NULL){
7467 firstChild = cur;
7468 }
7469 xmlAddChild((xmlNodePtr) ent, nw);
7470 xmlAddChild(ctxt->node, cur);
7471 }
7472 if (cur == last)
7473 break;
7474 cur = next;
7475 }
7476 if (ent->owner == 0)
7477 ent->owner = 1;
7478 #ifdef LIBXML_LEGACY_ENABLED
7479 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7480 xmlAddEntityReference(ent, firstChild, nw);
7481 #endif /* LIBXML_LEGACY_ENABLED */
7482 } else {
7483 const xmlChar *nbktext;
7484
7485 /*
7486 * the name change is to avoid coalescing of the
7487 * node with a possible previous text one which
7488 * would make ent->children a dangling pointer
7489 */
7490 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7491 -1);
7492 if (ent->children->type == XML_TEXT_NODE)
7493 ent->children->name = nbktext;
7494 if ((ent->last != ent->children) &&
7495 (ent->last->type == XML_TEXT_NODE))
7496 ent->last->name = nbktext;
7497 xmlAddChildList(ctxt->node, ent->children);
7498 }
7499
7500 /*
7501 * This is to avoid a nasty side effect, see
7502 * characters() in SAX.c
7503 */
7504 ctxt->nodemem = 0;
7505 ctxt->nodelen = 0;
7506 return;
7507 }
7508 }
7509 }
7510
7511 /**
7512 * xmlParseEntityRef:
7513 * @ctxt: an XML parser context
7514 *
7515 * parse ENTITY references declarations
7516 *
7517 * [68] EntityRef ::= '&' Name ';'
7518 *
7519 * [ WFC: Entity Declared ]
7520 * In a document without any DTD, a document with only an internal DTD
7521 * subset which contains no parameter entity references, or a document
7522 * with "standalone='yes'", the Name given in the entity reference
7523 * must match that in an entity declaration, except that well-formed
7524 * documents need not declare any of the following entities: amp, lt,
7525 * gt, apos, quot. The declaration of a parameter entity must precede
7526 * any reference to it. Similarly, the declaration of a general entity
7527 * must precede any reference to it which appears in a default value in an
7528 * attribute-list declaration. Note that if entities are declared in the
7529 * external subset or in external parameter entities, a non-validating
7530 * processor is not obligated to read and process their declarations;
7531 * for such documents, the rule that an entity must be declared is a
7532 * well-formedness constraint only if standalone='yes'.
7533 *
7534 * [ WFC: Parsed Entity ]
7535 * An entity reference must not contain the name of an unparsed entity
7536 *
7537 * Returns the xmlEntityPtr if found, or NULL otherwise.
7538 */
7539 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7540 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7541 const xmlChar *name;
7542 xmlEntityPtr ent = NULL;
7543
7544 GROW;
7545 if (ctxt->instate == XML_PARSER_EOF)
7546 return(NULL);
7547
7548 if (RAW != '&')
7549 return(NULL);
7550 NEXT;
7551 name = xmlParseName(ctxt);
7552 if (name == NULL) {
7553 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7554 "xmlParseEntityRef: no name\n");
7555 return(NULL);
7556 }
7557 if (RAW != ';') {
7558 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7559 return(NULL);
7560 }
7561 NEXT;
7562
7563 /*
7564 * Predefined entities override any extra definition
7565 */
7566 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7567 ent = xmlGetPredefinedEntity(name);
7568 if (ent != NULL)
7569 return(ent);
7570 }
7571
7572 /*
7573 * Increase the number of entity references parsed
7574 */
7575 ctxt->nbentities++;
7576
7577 /*
7578 * Ask first SAX for entity resolution, otherwise try the
7579 * entities which may have stored in the parser context.
7580 */
7581 if (ctxt->sax != NULL) {
7582 if (ctxt->sax->getEntity != NULL)
7583 ent = ctxt->sax->getEntity(ctxt->userData, name);
7584 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7585 (ctxt->options & XML_PARSE_OLDSAX))
7586 ent = xmlGetPredefinedEntity(name);
7587 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7588 (ctxt->userData==ctxt)) {
7589 ent = xmlSAX2GetEntity(ctxt, name);
7590 }
7591 }
7592 if (ctxt->instate == XML_PARSER_EOF)
7593 return(NULL);
7594 /*
7595 * [ WFC: Entity Declared ]
7596 * In a document without any DTD, a document with only an
7597 * internal DTD subset which contains no parameter entity
7598 * references, or a document with "standalone='yes'", the
7599 * Name given in the entity reference must match that in an
7600 * entity declaration, except that well-formed documents
7601 * need not declare any of the following entities: amp, lt,
7602 * gt, apos, quot.
7603 * The declaration of a parameter entity must precede any
7604 * reference to it.
7605 * Similarly, the declaration of a general entity must
7606 * precede any reference to it which appears in a default
7607 * value in an attribute-list declaration. Note that if
7608 * entities are declared in the external subset or in
7609 * external parameter entities, a non-validating processor
7610 * is not obligated to read and process their declarations;
7611 * for such documents, the rule that an entity must be
7612 * declared is a well-formedness constraint only if
7613 * standalone='yes'.
7614 */
7615 if (ent == NULL) {
7616 if ((ctxt->standalone == 1) ||
7617 ((ctxt->hasExternalSubset == 0) &&
7618 (ctxt->hasPErefs == 0))) {
7619 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7620 "Entity '%s' not defined\n", name);
7621 } else {
7622 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7623 "Entity '%s' not defined\n", name);
7624 if ((ctxt->inSubset == 0) &&
7625 (ctxt->sax != NULL) &&
7626 (ctxt->sax->reference != NULL)) {
7627 ctxt->sax->reference(ctxt->userData, name);
7628 }
7629 }
7630 xmlParserEntityCheck(ctxt, 0, ent, 0);
7631 ctxt->valid = 0;
7632 }
7633
7634 /*
7635 * [ WFC: Parsed Entity ]
7636 * An entity reference must not contain the name of an
7637 * unparsed entity
7638 */
7639 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7640 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7641 "Entity reference to unparsed entity %s\n", name);
7642 }
7643
7644 /*
7645 * [ WFC: No External Entity References ]
7646 * Attribute values cannot contain direct or indirect
7647 * entity references to external entities.
7648 */
7649 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7650 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7651 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7652 "Attribute references external entity '%s'\n", name);
7653 }
7654 /*
7655 * [ WFC: No < in Attribute Values ]
7656 * The replacement text of any entity referred to directly or
7657 * indirectly in an attribute value (other than "<") must
7658 * not contain a <.
7659 */
7660 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7661 (ent != NULL) &&
7662 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7663 if (((ent->checked & 1) || (ent->checked == 0)) &&
7664 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7665 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7666 "'<' in entity '%s' is not allowed in attributes values\n", name);
7667 }
7668 }
7669
7670 /*
7671 * Internal check, no parameter entities here ...
7672 */
7673 else {
7674 switch (ent->etype) {
7675 case XML_INTERNAL_PARAMETER_ENTITY:
7676 case XML_EXTERNAL_PARAMETER_ENTITY:
7677 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7678 "Attempt to reference the parameter entity '%s'\n",
7679 name);
7680 break;
7681 default:
7682 break;
7683 }
7684 }
7685
7686 /*
7687 * [ WFC: No Recursion ]
7688 * A parsed entity must not contain a recursive reference
7689 * to itself, either directly or indirectly.
7690 * Done somewhere else
7691 */
7692 return(ent);
7693 }
7694
7695 /**
7696 * xmlParseStringEntityRef:
7697 * @ctxt: an XML parser context
7698 * @str: a pointer to an index in the string
7699 *
7700 * parse ENTITY references declarations, but this version parses it from
7701 * a string value.
7702 *
7703 * [68] EntityRef ::= '&' Name ';'
7704 *
7705 * [ WFC: Entity Declared ]
7706 * In a document without any DTD, a document with only an internal DTD
7707 * subset which contains no parameter entity references, or a document
7708 * with "standalone='yes'", the Name given in the entity reference
7709 * must match that in an entity declaration, except that well-formed
7710 * documents need not declare any of the following entities: amp, lt,
7711 * gt, apos, quot. The declaration of a parameter entity must precede
7712 * any reference to it. Similarly, the declaration of a general entity
7713 * must precede any reference to it which appears in a default value in an
7714 * attribute-list declaration. Note that if entities are declared in the
7715 * external subset or in external parameter entities, a non-validating
7716 * processor is not obligated to read and process their declarations;
7717 * for such documents, the rule that an entity must be declared is a
7718 * well-formedness constraint only if standalone='yes'.
7719 *
7720 * [ WFC: Parsed Entity ]
7721 * An entity reference must not contain the name of an unparsed entity
7722 *
7723 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7724 * is updated to the current location in the string.
7725 */
7726 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7727 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7728 xmlChar *name;
7729 const xmlChar *ptr;
7730 xmlChar cur;
7731 xmlEntityPtr ent = NULL;
7732
7733 if ((str == NULL) || (*str == NULL))
7734 return(NULL);
7735 ptr = *str;
7736 cur = *ptr;
7737 if (cur != '&')
7738 return(NULL);
7739
7740 ptr++;
7741 name = xmlParseStringName(ctxt, &ptr);
7742 if (name == NULL) {
7743 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7744 "xmlParseStringEntityRef: no name\n");
7745 *str = ptr;
7746 return(NULL);
7747 }
7748 if (*ptr != ';') {
7749 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7750 xmlFree(name);
7751 *str = ptr;
7752 return(NULL);
7753 }
7754 ptr++;
7755
7756
7757 /*
7758 * Predefined entities override any extra definition
7759 */
7760 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7761 ent = xmlGetPredefinedEntity(name);
7762 if (ent != NULL) {
7763 xmlFree(name);
7764 *str = ptr;
7765 return(ent);
7766 }
7767 }
7768
7769 /*
7770 * Increase the number of entity references parsed
7771 */
7772 ctxt->nbentities++;
7773
7774 /*
7775 * Ask first SAX for entity resolution, otherwise try the
7776 * entities which may have stored in the parser context.
7777 */
7778 if (ctxt->sax != NULL) {
7779 if (ctxt->sax->getEntity != NULL)
7780 ent = ctxt->sax->getEntity(ctxt->userData, name);
7781 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7782 ent = xmlGetPredefinedEntity(name);
7783 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7784 ent = xmlSAX2GetEntity(ctxt, name);
7785 }
7786 }
7787 if (ctxt->instate == XML_PARSER_EOF) {
7788 xmlFree(name);
7789 return(NULL);
7790 }
7791
7792 /*
7793 * [ WFC: Entity Declared ]
7794 * In a document without any DTD, a document with only an
7795 * internal DTD subset which contains no parameter entity
7796 * references, or a document with "standalone='yes'", the
7797 * Name given in the entity reference must match that in an
7798 * entity declaration, except that well-formed documents
7799 * need not declare any of the following entities: amp, lt,
7800 * gt, apos, quot.
7801 * The declaration of a parameter entity must precede any
7802 * reference to it.
7803 * Similarly, the declaration of a general entity must
7804 * precede any reference to it which appears in a default
7805 * value in an attribute-list declaration. Note that if
7806 * entities are declared in the external subset or in
7807 * external parameter entities, a non-validating processor
7808 * is not obligated to read and process their declarations;
7809 * for such documents, the rule that an entity must be
7810 * declared is a well-formedness constraint only if
7811 * standalone='yes'.
7812 */
7813 if (ent == NULL) {
7814 if ((ctxt->standalone == 1) ||
7815 ((ctxt->hasExternalSubset == 0) &&
7816 (ctxt->hasPErefs == 0))) {
7817 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7818 "Entity '%s' not defined\n", name);
7819 } else {
7820 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7821 "Entity '%s' not defined\n",
7822 name);
7823 }
7824 xmlParserEntityCheck(ctxt, 0, ent, 0);
7825 /* TODO ? check regressions ctxt->valid = 0; */
7826 }
7827
7828 /*
7829 * [ WFC: Parsed Entity ]
7830 * An entity reference must not contain the name of an
7831 * unparsed entity
7832 */
7833 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7834 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7835 "Entity reference to unparsed entity %s\n", name);
7836 }
7837
7838 /*
7839 * [ WFC: No External Entity References ]
7840 * Attribute values cannot contain direct or indirect
7841 * entity references to external entities.
7842 */
7843 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7844 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7845 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7846 "Attribute references external entity '%s'\n", name);
7847 }
7848 /*
7849 * [ WFC: No < in Attribute Values ]
7850 * The replacement text of any entity referred to directly or
7851 * indirectly in an attribute value (other than "<") must
7852 * not contain a <.
7853 */
7854 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7855 (ent != NULL) && (ent->content != NULL) &&
7856 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7857 (xmlStrchr(ent->content, '<'))) {
7858 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7859 "'<' in entity '%s' is not allowed in attributes values\n",
7860 name);
7861 }
7862
7863 /*
7864 * Internal check, no parameter entities here ...
7865 */
7866 else {
7867 switch (ent->etype) {
7868 case XML_INTERNAL_PARAMETER_ENTITY:
7869 case XML_EXTERNAL_PARAMETER_ENTITY:
7870 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7871 "Attempt to reference the parameter entity '%s'\n",
7872 name);
7873 break;
7874 default:
7875 break;
7876 }
7877 }
7878
7879 /*
7880 * [ WFC: No Recursion ]
7881 * A parsed entity must not contain a recursive reference
7882 * to itself, either directly or indirectly.
7883 * Done somewhere else
7884 */
7885
7886 xmlFree(name);
7887 *str = ptr;
7888 return(ent);
7889 }
7890
7891 /**
7892 * xmlParsePEReference:
7893 * @ctxt: an XML parser context
7894 *
7895 * parse PEReference declarations
7896 * The entity content is handled directly by pushing it's content as
7897 * a new input stream.
7898 *
7899 * [69] PEReference ::= '%' Name ';'
7900 *
7901 * [ WFC: No Recursion ]
7902 * A parsed entity must not contain a recursive
7903 * reference to itself, either directly or indirectly.
7904 *
7905 * [ WFC: Entity Declared ]
7906 * In a document without any DTD, a document with only an internal DTD
7907 * subset which contains no parameter entity references, or a document
7908 * with "standalone='yes'", ... ... The declaration of a parameter
7909 * entity must precede any reference to it...
7910 *
7911 * [ VC: Entity Declared ]
7912 * In a document with an external subset or external parameter entities
7913 * with "standalone='no'", ... ... The declaration of a parameter entity
7914 * must precede any reference to it...
7915 *
7916 * [ WFC: In DTD ]
7917 * Parameter-entity references may only appear in the DTD.
7918 * NOTE: misleading but this is handled.
7919 */
7920 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7921 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7922 {
7923 const xmlChar *name;
7924 xmlEntityPtr entity = NULL;
7925 xmlParserInputPtr input;
7926
7927 if (RAW != '%')
7928 return;
7929 NEXT;
7930 name = xmlParseName(ctxt);
7931 if (name == NULL) {
7932 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7933 return;
7934 }
7935 if (xmlParserDebugEntities)
7936 xmlGenericError(xmlGenericErrorContext,
7937 "PEReference: %s\n", name);
7938 if (RAW != ';') {
7939 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7940 return;
7941 }
7942
7943 NEXT;
7944
7945 /*
7946 * Increase the number of entity references parsed
7947 */
7948 ctxt->nbentities++;
7949
7950 /*
7951 * Request the entity from SAX
7952 */
7953 if ((ctxt->sax != NULL) &&
7954 (ctxt->sax->getParameterEntity != NULL))
7955 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7956 if (ctxt->instate == XML_PARSER_EOF)
7957 return;
7958 if (entity == NULL) {
7959 /*
7960 * [ WFC: Entity Declared ]
7961 * In a document without any DTD, a document with only an
7962 * internal DTD subset which contains no parameter entity
7963 * references, or a document with "standalone='yes'", ...
7964 * ... The declaration of a parameter entity must precede
7965 * any reference to it...
7966 */
7967 if ((ctxt->standalone == 1) ||
7968 ((ctxt->hasExternalSubset == 0) &&
7969 (ctxt->hasPErefs == 0))) {
7970 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7971 "PEReference: %%%s; not found\n",
7972 name);
7973 } else {
7974 /*
7975 * [ VC: Entity Declared ]
7976 * In a document with an external subset or external
7977 * parameter entities with "standalone='no'", ...
7978 * ... The declaration of a parameter entity must
7979 * precede any reference to it...
7980 */
7981 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7982 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7983 "PEReference: %%%s; not found\n",
7984 name, NULL);
7985 } else
7986 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7987 "PEReference: %%%s; not found\n",
7988 name, NULL);
7989 ctxt->valid = 0;
7990 }
7991 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7992 } else {
7993 /*
7994 * Internal checking in case the entity quest barfed
7995 */
7996 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7997 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7998 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7999 "Internal: %%%s; is not a parameter entity\n",
8000 name, NULL);
8001 } else {
8002 xmlChar start[4];
8003 xmlCharEncoding enc;
8004
8005 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8006 return;
8007
8008 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8009 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8010 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8011 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8012 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8013 (ctxt->replaceEntities == 0) &&
8014 (ctxt->validate == 0))
8015 return;
8016
8017 input = xmlNewEntityInputStream(ctxt, entity);
8018 if (xmlPushInput(ctxt, input) < 0) {
8019 xmlFreeInputStream(input);
8020 return;
8021 }
8022
8023 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8024 /*
8025 * Get the 4 first bytes and decode the charset
8026 * if enc != XML_CHAR_ENCODING_NONE
8027 * plug some encoding conversion routines.
8028 * Note that, since we may have some non-UTF8
8029 * encoding (like UTF16, bug 135229), the 'length'
8030 * is not known, but we can calculate based upon
8031 * the amount of data in the buffer.
8032 */
8033 GROW
8034 if (ctxt->instate == XML_PARSER_EOF)
8035 return;
8036 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8037 start[0] = RAW;
8038 start[1] = NXT(1);
8039 start[2] = NXT(2);
8040 start[3] = NXT(3);
8041 enc = xmlDetectCharEncoding(start, 4);
8042 if (enc != XML_CHAR_ENCODING_NONE) {
8043 xmlSwitchEncoding(ctxt, enc);
8044 }
8045 }
8046
8047 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8048 (IS_BLANK_CH(NXT(5)))) {
8049 xmlParseTextDecl(ctxt);
8050 }
8051 }
8052 }
8053 }
8054 ctxt->hasPErefs = 1;
8055 }
8056
8057 /**
8058 * xmlLoadEntityContent:
8059 * @ctxt: an XML parser context
8060 * @entity: an unloaded system entity
8061 *
8062 * Load the original content of the given system entity from the
8063 * ExternalID/SystemID given. This is to be used for Included in Literal
8064 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8065 *
8066 * Returns 0 in case of success and -1 in case of failure
8067 */
8068 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8069 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8070 xmlParserInputPtr input;
8071 xmlBufferPtr buf;
8072 int l, c;
8073 int count = 0;
8074
8075 if ((ctxt == NULL) || (entity == NULL) ||
8076 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8077 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8078 (entity->content != NULL)) {
8079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8080 "xmlLoadEntityContent parameter error");
8081 return(-1);
8082 }
8083
8084 if (xmlParserDebugEntities)
8085 xmlGenericError(xmlGenericErrorContext,
8086 "Reading %s entity content input\n", entity->name);
8087
8088 buf = xmlBufferCreate();
8089 if (buf == NULL) {
8090 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8091 "xmlLoadEntityContent parameter error");
8092 return(-1);
8093 }
8094
8095 input = xmlNewEntityInputStream(ctxt, entity);
8096 if (input == NULL) {
8097 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8098 "xmlLoadEntityContent input error");
8099 xmlBufferFree(buf);
8100 return(-1);
8101 }
8102
8103 /*
8104 * Push the entity as the current input, read char by char
8105 * saving to the buffer until the end of the entity or an error
8106 */
8107 if (xmlPushInput(ctxt, input) < 0) {
8108 xmlBufferFree(buf);
8109 return(-1);
8110 }
8111
8112 GROW;
8113 c = CUR_CHAR(l);
8114 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8115 (IS_CHAR(c))) {
8116 xmlBufferAdd(buf, ctxt->input->cur, l);
8117 if (count++ > XML_PARSER_CHUNK_SIZE) {
8118 count = 0;
8119 GROW;
8120 if (ctxt->instate == XML_PARSER_EOF) {
8121 xmlBufferFree(buf);
8122 return(-1);
8123 }
8124 }
8125 NEXTL(l);
8126 c = CUR_CHAR(l);
8127 if (c == 0) {
8128 count = 0;
8129 GROW;
8130 if (ctxt->instate == XML_PARSER_EOF) {
8131 xmlBufferFree(buf);
8132 return(-1);
8133 }
8134 c = CUR_CHAR(l);
8135 }
8136 }
8137
8138 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8139 xmlPopInput(ctxt);
8140 } else if (!IS_CHAR(c)) {
8141 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8142 "xmlLoadEntityContent: invalid char value %d\n",
8143 c);
8144 xmlBufferFree(buf);
8145 return(-1);
8146 }
8147 entity->content = buf->content;
8148 buf->content = NULL;
8149 xmlBufferFree(buf);
8150
8151 return(0);
8152 }
8153
8154 /**
8155 * xmlParseStringPEReference:
8156 * @ctxt: an XML parser context
8157 * @str: a pointer to an index in the string
8158 *
8159 * parse PEReference declarations
8160 *
8161 * [69] PEReference ::= '%' Name ';'
8162 *
8163 * [ WFC: No Recursion ]
8164 * A parsed entity must not contain a recursive
8165 * reference to itself, either directly or indirectly.
8166 *
8167 * [ WFC: Entity Declared ]
8168 * In a document without any DTD, a document with only an internal DTD
8169 * subset which contains no parameter entity references, or a document
8170 * with "standalone='yes'", ... ... The declaration of a parameter
8171 * entity must precede any reference to it...
8172 *
8173 * [ VC: Entity Declared ]
8174 * In a document with an external subset or external parameter entities
8175 * with "standalone='no'", ... ... The declaration of a parameter entity
8176 * must precede any reference to it...
8177 *
8178 * [ WFC: In DTD ]
8179 * Parameter-entity references may only appear in the DTD.
8180 * NOTE: misleading but this is handled.
8181 *
8182 * Returns the string of the entity content.
8183 * str is updated to the current value of the index
8184 */
8185 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8186 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8187 const xmlChar *ptr;
8188 xmlChar cur;
8189 xmlChar *name;
8190 xmlEntityPtr entity = NULL;
8191
8192 if ((str == NULL) || (*str == NULL)) return(NULL);
8193 ptr = *str;
8194 cur = *ptr;
8195 if (cur != '%')
8196 return(NULL);
8197 ptr++;
8198 name = xmlParseStringName(ctxt, &ptr);
8199 if (name == NULL) {
8200 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8201 "xmlParseStringPEReference: no name\n");
8202 *str = ptr;
8203 return(NULL);
8204 }
8205 cur = *ptr;
8206 if (cur != ';') {
8207 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8208 xmlFree(name);
8209 *str = ptr;
8210 return(NULL);
8211 }
8212 ptr++;
8213
8214 /*
8215 * Increase the number of entity references parsed
8216 */
8217 ctxt->nbentities++;
8218
8219 /*
8220 * Request the entity from SAX
8221 */
8222 if ((ctxt->sax != NULL) &&
8223 (ctxt->sax->getParameterEntity != NULL))
8224 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8225 if (ctxt->instate == XML_PARSER_EOF) {
8226 xmlFree(name);
8227 *str = ptr;
8228 return(NULL);
8229 }
8230 if (entity == NULL) {
8231 /*
8232 * [ WFC: Entity Declared ]
8233 * In a document without any DTD, a document with only an
8234 * internal DTD subset which contains no parameter entity
8235 * references, or a document with "standalone='yes'", ...
8236 * ... The declaration of a parameter entity must precede
8237 * any reference to it...
8238 */
8239 if ((ctxt->standalone == 1) ||
8240 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8241 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8242 "PEReference: %%%s; not found\n", name);
8243 } else {
8244 /*
8245 * [ VC: Entity Declared ]
8246 * In a document with an external subset or external
8247 * parameter entities with "standalone='no'", ...
8248 * ... The declaration of a parameter entity must
8249 * precede any reference to it...
8250 */
8251 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8252 "PEReference: %%%s; not found\n",
8253 name, NULL);
8254 ctxt->valid = 0;
8255 }
8256 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8257 } else {
8258 /*
8259 * Internal checking in case the entity quest barfed
8260 */
8261 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8262 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8263 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8264 "%%%s; is not a parameter entity\n",
8265 name, NULL);
8266 }
8267 }
8268 ctxt->hasPErefs = 1;
8269 xmlFree(name);
8270 *str = ptr;
8271 return(entity);
8272 }
8273
8274 /**
8275 * xmlParseDocTypeDecl:
8276 * @ctxt: an XML parser context
8277 *
8278 * parse a DOCTYPE declaration
8279 *
8280 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8281 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8282 *
8283 * [ VC: Root Element Type ]
8284 * The Name in the document type declaration must match the element
8285 * type of the root element.
8286 */
8287
8288 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8289 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8290 const xmlChar *name = NULL;
8291 xmlChar *ExternalID = NULL;
8292 xmlChar *URI = NULL;
8293
8294 /*
8295 * We know that '<!DOCTYPE' has been detected.
8296 */
8297 SKIP(9);
8298
8299 SKIP_BLANKS;
8300
8301 /*
8302 * Parse the DOCTYPE name.
8303 */
8304 name = xmlParseName(ctxt);
8305 if (name == NULL) {
8306 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8307 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8308 }
8309 ctxt->intSubName = name;
8310
8311 SKIP_BLANKS;
8312
8313 /*
8314 * Check for SystemID and ExternalID
8315 */
8316 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8317
8318 if ((URI != NULL) || (ExternalID != NULL)) {
8319 ctxt->hasExternalSubset = 1;
8320 }
8321 ctxt->extSubURI = URI;
8322 ctxt->extSubSystem = ExternalID;
8323
8324 SKIP_BLANKS;
8325
8326 /*
8327 * Create and update the internal subset.
8328 */
8329 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8330 (!ctxt->disableSAX))
8331 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8332 if (ctxt->instate == XML_PARSER_EOF)
8333 return;
8334
8335 /*
8336 * Is there any internal subset declarations ?
8337 * they are handled separately in xmlParseInternalSubset()
8338 */
8339 if (RAW == '[')
8340 return;
8341
8342 /*
8343 * We should be at the end of the DOCTYPE declaration.
8344 */
8345 if (RAW != '>') {
8346 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8347 }
8348 NEXT;
8349 }
8350
8351 /**
8352 * xmlParseInternalSubset:
8353 * @ctxt: an XML parser context
8354 *
8355 * parse the internal subset declaration
8356 *
8357 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8358 */
8359
8360 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8361 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8362 /*
8363 * Is there any DTD definition ?
8364 */
8365 if (RAW == '[') {
8366 int baseInputNr = ctxt->inputNr;
8367 ctxt->instate = XML_PARSER_DTD;
8368 NEXT;
8369 /*
8370 * Parse the succession of Markup declarations and
8371 * PEReferences.
8372 * Subsequence (markupdecl | PEReference | S)*
8373 */
8374 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8375 (ctxt->instate != XML_PARSER_EOF)) {
8376 const xmlChar *check = CUR_PTR;
8377 unsigned int cons = ctxt->input->consumed;
8378
8379 SKIP_BLANKS;
8380 xmlParseMarkupDecl(ctxt);
8381 xmlParsePEReference(ctxt);
8382
8383 /*
8384 * Conditional sections are allowed from external entities included
8385 * by PE References in the internal subset.
8386 */
8387 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8388 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8389 xmlParseConditionalSections(ctxt);
8390 }
8391
8392 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8393 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8394 "xmlParseInternalSubset: error detected in Markup declaration\n");
8395 if (ctxt->inputNr > baseInputNr)
8396 xmlPopInput(ctxt);
8397 else
8398 break;
8399 }
8400 }
8401 if (RAW == ']') {
8402 NEXT;
8403 SKIP_BLANKS;
8404 }
8405 }
8406
8407 /*
8408 * We should be at the end of the DOCTYPE declaration.
8409 */
8410 if (RAW != '>') {
8411 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8412 return;
8413 }
8414 NEXT;
8415 }
8416
8417 #ifdef LIBXML_SAX1_ENABLED
8418 /**
8419 * xmlParseAttribute:
8420 * @ctxt: an XML parser context
8421 * @value: a xmlChar ** used to store the value of the attribute
8422 *
8423 * parse an attribute
8424 *
8425 * [41] Attribute ::= Name Eq AttValue
8426 *
8427 * [ WFC: No External Entity References ]
8428 * Attribute values cannot contain direct or indirect entity references
8429 * to external entities.
8430 *
8431 * [ WFC: No < in Attribute Values ]
8432 * The replacement text of any entity referred to directly or indirectly in
8433 * an attribute value (other than "<") must not contain a <.
8434 *
8435 * [ VC: Attribute Value Type ]
8436 * The attribute must have been declared; the value must be of the type
8437 * declared for it.
8438 *
8439 * [25] Eq ::= S? '=' S?
8440 *
8441 * With namespace:
8442 *
8443 * [NS 11] Attribute ::= QName Eq AttValue
8444 *
8445 * Also the case QName == xmlns:??? is handled independently as a namespace
8446 * definition.
8447 *
8448 * Returns the attribute name, and the value in *value.
8449 */
8450
8451 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8452 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8453 const xmlChar *name;
8454 xmlChar *val;
8455
8456 *value = NULL;
8457 GROW;
8458 name = xmlParseName(ctxt);
8459 if (name == NULL) {
8460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8461 "error parsing attribute name\n");
8462 return(NULL);
8463 }
8464
8465 /*
8466 * read the value
8467 */
8468 SKIP_BLANKS;
8469 if (RAW == '=') {
8470 NEXT;
8471 SKIP_BLANKS;
8472 val = xmlParseAttValue(ctxt);
8473 ctxt->instate = XML_PARSER_CONTENT;
8474 } else {
8475 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8476 "Specification mandates value for attribute %s\n", name);
8477 return(NULL);
8478 }
8479
8480 /*
8481 * Check that xml:lang conforms to the specification
8482 * No more registered as an error, just generate a warning now
8483 * since this was deprecated in XML second edition
8484 */
8485 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8486 if (!xmlCheckLanguageID(val)) {
8487 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8488 "Malformed value for xml:lang : %s\n",
8489 val, NULL);
8490 }
8491 }
8492
8493 /*
8494 * Check that xml:space conforms to the specification
8495 */
8496 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8497 if (xmlStrEqual(val, BAD_CAST "default"))
8498 *(ctxt->space) = 0;
8499 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8500 *(ctxt->space) = 1;
8501 else {
8502 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8503 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8504 val, NULL);
8505 }
8506 }
8507
8508 *value = val;
8509 return(name);
8510 }
8511
8512 /**
8513 * xmlParseStartTag:
8514 * @ctxt: an XML parser context
8515 *
8516 * parse a start of tag either for rule element or
8517 * EmptyElement. In both case we don't parse the tag closing chars.
8518 *
8519 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8520 *
8521 * [ WFC: Unique Att Spec ]
8522 * No attribute name may appear more than once in the same start-tag or
8523 * empty-element tag.
8524 *
8525 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8526 *
8527 * [ WFC: Unique Att Spec ]
8528 * No attribute name may appear more than once in the same start-tag or
8529 * empty-element tag.
8530 *
8531 * With namespace:
8532 *
8533 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8534 *
8535 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8536 *
8537 * Returns the element name parsed
8538 */
8539
8540 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8541 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8542 const xmlChar *name;
8543 const xmlChar *attname;
8544 xmlChar *attvalue;
8545 const xmlChar **atts = ctxt->atts;
8546 int nbatts = 0;
8547 int maxatts = ctxt->maxatts;
8548 int i;
8549
8550 if (RAW != '<') return(NULL);
8551 NEXT1;
8552
8553 name = xmlParseName(ctxt);
8554 if (name == NULL) {
8555 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8556 "xmlParseStartTag: invalid element name\n");
8557 return(NULL);
8558 }
8559
8560 /*
8561 * Now parse the attributes, it ends up with the ending
8562 *
8563 * (S Attribute)* S?
8564 */
8565 SKIP_BLANKS;
8566 GROW;
8567
8568 while (((RAW != '>') &&
8569 ((RAW != '/') || (NXT(1) != '>')) &&
8570 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8571 const xmlChar *q = CUR_PTR;
8572 unsigned int cons = ctxt->input->consumed;
8573
8574 attname = xmlParseAttribute(ctxt, &attvalue);
8575 if ((attname != NULL) && (attvalue != NULL)) {
8576 /*
8577 * [ WFC: Unique Att Spec ]
8578 * No attribute name may appear more than once in the same
8579 * start-tag or empty-element tag.
8580 */
8581 for (i = 0; i < nbatts;i += 2) {
8582 if (xmlStrEqual(atts[i], attname)) {
8583 xmlErrAttributeDup(ctxt, NULL, attname);
8584 xmlFree(attvalue);
8585 goto failed;
8586 }
8587 }
8588 /*
8589 * Add the pair to atts
8590 */
8591 if (atts == NULL) {
8592 maxatts = 22; /* allow for 10 attrs by default */
8593 atts = (const xmlChar **)
8594 xmlMalloc(maxatts * sizeof(xmlChar *));
8595 if (atts == NULL) {
8596 xmlErrMemory(ctxt, NULL);
8597 if (attvalue != NULL)
8598 xmlFree(attvalue);
8599 goto failed;
8600 }
8601 ctxt->atts = atts;
8602 ctxt->maxatts = maxatts;
8603 } else if (nbatts + 4 > maxatts) {
8604 const xmlChar **n;
8605
8606 maxatts *= 2;
8607 n = (const xmlChar **) xmlRealloc((void *) atts,
8608 maxatts * sizeof(const xmlChar *));
8609 if (n == NULL) {
8610 xmlErrMemory(ctxt, NULL);
8611 if (attvalue != NULL)
8612 xmlFree(attvalue);
8613 goto failed;
8614 }
8615 atts = n;
8616 ctxt->atts = atts;
8617 ctxt->maxatts = maxatts;
8618 }
8619 atts[nbatts++] = attname;
8620 atts[nbatts++] = attvalue;
8621 atts[nbatts] = NULL;
8622 atts[nbatts + 1] = NULL;
8623 } else {
8624 if (attvalue != NULL)
8625 xmlFree(attvalue);
8626 }
8627
8628 failed:
8629
8630 GROW
8631 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8632 break;
8633 if (SKIP_BLANKS == 0) {
8634 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8635 "attributes construct error\n");
8636 }
8637 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8638 (attname == NULL) && (attvalue == NULL)) {
8639 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8640 "xmlParseStartTag: problem parsing attributes\n");
8641 break;
8642 }
8643 SHRINK;
8644 GROW;
8645 }
8646
8647 /*
8648 * SAX: Start of Element !
8649 */
8650 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8651 (!ctxt->disableSAX)) {
8652 if (nbatts > 0)
8653 ctxt->sax->startElement(ctxt->userData, name, atts);
8654 else
8655 ctxt->sax->startElement(ctxt->userData, name, NULL);
8656 }
8657
8658 if (atts != NULL) {
8659 /* Free only the content strings */
8660 for (i = 1;i < nbatts;i+=2)
8661 if (atts[i] != NULL)
8662 xmlFree((xmlChar *) atts[i]);
8663 }
8664 return(name);
8665 }
8666
8667 /**
8668 * xmlParseEndTag1:
8669 * @ctxt: an XML parser context
8670 * @line: line of the start tag
8671 * @nsNr: number of namespaces on the start tag
8672 *
8673 * parse an end of tag
8674 *
8675 * [42] ETag ::= '</' Name S? '>'
8676 *
8677 * With namespace
8678 *
8679 * [NS 9] ETag ::= '</' QName S? '>'
8680 */
8681
8682 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8683 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8684 const xmlChar *name;
8685
8686 GROW;
8687 if ((RAW != '<') || (NXT(1) != '/')) {
8688 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8689 "xmlParseEndTag: '</' not found\n");
8690 return;
8691 }
8692 SKIP(2);
8693
8694 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8695
8696 /*
8697 * We should definitely be at the ending "S? '>'" part
8698 */
8699 GROW;
8700 SKIP_BLANKS;
8701 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8702 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8703 } else
8704 NEXT1;
8705
8706 /*
8707 * [ WFC: Element Type Match ]
8708 * The Name in an element's end-tag must match the element type in the
8709 * start-tag.
8710 *
8711 */
8712 if (name != (xmlChar*)1) {
8713 if (name == NULL) name = BAD_CAST "unparsable";
8714 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8715 "Opening and ending tag mismatch: %s line %d and %s\n",
8716 ctxt->name, line, name);
8717 }
8718
8719 /*
8720 * SAX: End of Tag
8721 */
8722 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8723 (!ctxt->disableSAX))
8724 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8725
8726 namePop(ctxt);
8727 spacePop(ctxt);
8728 return;
8729 }
8730
8731 /**
8732 * xmlParseEndTag:
8733 * @ctxt: an XML parser context
8734 *
8735 * parse an end of tag
8736 *
8737 * [42] ETag ::= '</' Name S? '>'
8738 *
8739 * With namespace
8740 *
8741 * [NS 9] ETag ::= '</' QName S? '>'
8742 */
8743
8744 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8745 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8746 xmlParseEndTag1(ctxt, 0);
8747 }
8748 #endif /* LIBXML_SAX1_ENABLED */
8749
8750 /************************************************************************
8751 * *
8752 * SAX 2 specific operations *
8753 * *
8754 ************************************************************************/
8755
8756 /*
8757 * xmlGetNamespace:
8758 * @ctxt: an XML parser context
8759 * @prefix: the prefix to lookup
8760 *
8761 * Lookup the namespace name for the @prefix (which ca be NULL)
8762 * The prefix must come from the @ctxt->dict dictionary
8763 *
8764 * Returns the namespace name or NULL if not bound
8765 */
8766 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8767 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8768 int i;
8769
8770 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8771 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8772 if (ctxt->nsTab[i] == prefix) {
8773 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8774 return(NULL);
8775 return(ctxt->nsTab[i + 1]);
8776 }
8777 return(NULL);
8778 }
8779
8780 /**
8781 * xmlParseQName:
8782 * @ctxt: an XML parser context
8783 * @prefix: pointer to store the prefix part
8784 *
8785 * parse an XML Namespace QName
8786 *
8787 * [6] QName ::= (Prefix ':')? LocalPart
8788 * [7] Prefix ::= NCName
8789 * [8] LocalPart ::= NCName
8790 *
8791 * Returns the Name parsed or NULL
8792 */
8793
8794 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8795 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8796 const xmlChar *l, *p;
8797
8798 GROW;
8799
8800 l = xmlParseNCName(ctxt);
8801 if (l == NULL) {
8802 if (CUR == ':') {
8803 l = xmlParseName(ctxt);
8804 if (l != NULL) {
8805 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8806 "Failed to parse QName '%s'\n", l, NULL, NULL);
8807 *prefix = NULL;
8808 return(l);
8809 }
8810 }
8811 return(NULL);
8812 }
8813 if (CUR == ':') {
8814 NEXT;
8815 p = l;
8816 l = xmlParseNCName(ctxt);
8817 if (l == NULL) {
8818 xmlChar *tmp;
8819
8820 if (ctxt->instate == XML_PARSER_EOF)
8821 return(NULL);
8822 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8823 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8824 l = xmlParseNmtoken(ctxt);
8825 if (l == NULL) {
8826 if (ctxt->instate == XML_PARSER_EOF)
8827 return(NULL);
8828 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8829 } else {
8830 tmp = xmlBuildQName(l, p, NULL, 0);
8831 xmlFree((char *)l);
8832 }
8833 p = xmlDictLookup(ctxt->dict, tmp, -1);
8834 if (tmp != NULL) xmlFree(tmp);
8835 *prefix = NULL;
8836 return(p);
8837 }
8838 if (CUR == ':') {
8839 xmlChar *tmp;
8840
8841 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8842 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8843 NEXT;
8844 tmp = (xmlChar *) xmlParseName(ctxt);
8845 if (tmp != NULL) {
8846 tmp = xmlBuildQName(tmp, l, NULL, 0);
8847 l = xmlDictLookup(ctxt->dict, tmp, -1);
8848 if (tmp != NULL) xmlFree(tmp);
8849 *prefix = p;
8850 return(l);
8851 }
8852 if (ctxt->instate == XML_PARSER_EOF)
8853 return(NULL);
8854 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8855 l = xmlDictLookup(ctxt->dict, tmp, -1);
8856 if (tmp != NULL) xmlFree(tmp);
8857 *prefix = p;
8858 return(l);
8859 }
8860 *prefix = p;
8861 } else
8862 *prefix = NULL;
8863 return(l);
8864 }
8865
8866 /**
8867 * xmlParseQNameAndCompare:
8868 * @ctxt: an XML parser context
8869 * @name: the localname
8870 * @prefix: the prefix, if any.
8871 *
8872 * parse an XML name and compares for match
8873 * (specialized for endtag parsing)
8874 *
8875 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8876 * and the name for mismatch
8877 */
8878
8879 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8880 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8881 xmlChar const *prefix) {
8882 const xmlChar *cmp;
8883 const xmlChar *in;
8884 const xmlChar *ret;
8885 const xmlChar *prefix2;
8886
8887 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8888
8889 GROW;
8890 in = ctxt->input->cur;
8891
8892 cmp = prefix;
8893 while (*in != 0 && *in == *cmp) {
8894 ++in;
8895 ++cmp;
8896 }
8897 if ((*cmp == 0) && (*in == ':')) {
8898 in++;
8899 cmp = name;
8900 while (*in != 0 && *in == *cmp) {
8901 ++in;
8902 ++cmp;
8903 }
8904 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8905 /* success */
8906 ctxt->input->col += in - ctxt->input->cur;
8907 ctxt->input->cur = in;
8908 return((const xmlChar*) 1);
8909 }
8910 }
8911 /*
8912 * all strings coms from the dictionary, equality can be done directly
8913 */
8914 ret = xmlParseQName (ctxt, &prefix2);
8915 if ((ret == name) && (prefix == prefix2))
8916 return((const xmlChar*) 1);
8917 return ret;
8918 }
8919
8920 /**
8921 * xmlParseAttValueInternal:
8922 * @ctxt: an XML parser context
8923 * @len: attribute len result
8924 * @alloc: whether the attribute was reallocated as a new string
8925 * @normalize: if 1 then further non-CDATA normalization must be done
8926 *
8927 * parse a value for an attribute.
8928 * NOTE: if no normalization is needed, the routine will return pointers
8929 * directly from the data buffer.
8930 *
8931 * 3.3.3 Attribute-Value Normalization:
8932 * Before the value of an attribute is passed to the application or
8933 * checked for validity, the XML processor must normalize it as follows:
8934 * - a character reference is processed by appending the referenced
8935 * character to the attribute value
8936 * - an entity reference is processed by recursively processing the
8937 * replacement text of the entity
8938 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8939 * appending #x20 to the normalized value, except that only a single
8940 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8941 * parsed entity or the literal entity value of an internal parsed entity
8942 * - other characters are processed by appending them to the normalized value
8943 * If the declared value is not CDATA, then the XML processor must further
8944 * process the normalized attribute value by discarding any leading and
8945 * trailing space (#x20) characters, and by replacing sequences of space
8946 * (#x20) characters by a single space (#x20) character.
8947 * All attributes for which no declaration has been read should be treated
8948 * by a non-validating parser as if declared CDATA.
8949 *
8950 * Returns the AttValue parsed or NULL. The value has to be freed by the
8951 * caller if it was copied, this can be detected by val[*len] == 0.
8952 */
8953
8954 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8955 const xmlChar *oldbase = ctxt->input->base;\
8956 GROW;\
8957 if (ctxt->instate == XML_PARSER_EOF)\
8958 return(NULL);\
8959 if (oldbase != ctxt->input->base) {\
8960 ptrdiff_t delta = ctxt->input->base - oldbase;\
8961 start = start + delta;\
8962 in = in + delta;\
8963 }\
8964 end = ctxt->input->end;
8965
8966 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8967 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8968 int normalize)
8969 {
8970 xmlChar limit = 0;
8971 const xmlChar *in = NULL, *start, *end, *last;
8972 xmlChar *ret = NULL;
8973 int line, col;
8974 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8975 XML_MAX_HUGE_LENGTH :
8976 XML_MAX_TEXT_LENGTH;
8977
8978 GROW;
8979 in = (xmlChar *) CUR_PTR;
8980 line = ctxt->input->line;
8981 col = ctxt->input->col;
8982 if (*in != '"' && *in != '\'') {
8983 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8984 return (NULL);
8985 }
8986 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8987
8988 /*
8989 * try to handle in this routine the most common case where no
8990 * allocation of a new string is required and where content is
8991 * pure ASCII.
8992 */
8993 limit = *in++;
8994 col++;
8995 end = ctxt->input->end;
8996 start = in;
8997 if (in >= end) {
8998 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8999 }
9000 if (normalize) {
9001 /*
9002 * Skip any leading spaces
9003 */
9004 while ((in < end) && (*in != limit) &&
9005 ((*in == 0x20) || (*in == 0x9) ||
9006 (*in == 0xA) || (*in == 0xD))) {
9007 if (*in == 0xA) {
9008 line++; col = 1;
9009 } else {
9010 col++;
9011 }
9012 in++;
9013 start = in;
9014 if (in >= end) {
9015 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9016 if ((in - start) > maxLength) {
9017 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9018 "AttValue length too long\n");
9019 return(NULL);
9020 }
9021 }
9022 }
9023 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9024 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9025 col++;
9026 if ((*in++ == 0x20) && (*in == 0x20)) break;
9027 if (in >= end) {
9028 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9029 if ((in - start) > maxLength) {
9030 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9031 "AttValue length too long\n");
9032 return(NULL);
9033 }
9034 }
9035 }
9036 last = in;
9037 /*
9038 * skip the trailing blanks
9039 */
9040 while ((last[-1] == 0x20) && (last > start)) last--;
9041 while ((in < end) && (*in != limit) &&
9042 ((*in == 0x20) || (*in == 0x9) ||
9043 (*in == 0xA) || (*in == 0xD))) {
9044 if (*in == 0xA) {
9045 line++, col = 1;
9046 } else {
9047 col++;
9048 }
9049 in++;
9050 if (in >= end) {
9051 const xmlChar *oldbase = ctxt->input->base;
9052 GROW;
9053 if (ctxt->instate == XML_PARSER_EOF)
9054 return(NULL);
9055 if (oldbase != ctxt->input->base) {
9056 ptrdiff_t delta = ctxt->input->base - oldbase;
9057 start = start + delta;
9058 in = in + delta;
9059 last = last + delta;
9060 }
9061 end = ctxt->input->end;
9062 if ((in - start) > maxLength) {
9063 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9064 "AttValue length too long\n");
9065 return(NULL);
9066 }
9067 }
9068 }
9069 if ((in - start) > maxLength) {
9070 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9071 "AttValue length too long\n");
9072 return(NULL);
9073 }
9074 if (*in != limit) goto need_complex;
9075 } else {
9076 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9077 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9078 in++;
9079 col++;
9080 if (in >= end) {
9081 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9082 if ((in - start) > maxLength) {
9083 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9084 "AttValue length too long\n");
9085 return(NULL);
9086 }
9087 }
9088 }
9089 last = in;
9090 if ((in - start) > maxLength) {
9091 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9092 "AttValue length too long\n");
9093 return(NULL);
9094 }
9095 if (*in != limit) goto need_complex;
9096 }
9097 in++;
9098 col++;
9099 if (len != NULL) {
9100 *len = last - start;
9101 ret = (xmlChar *) start;
9102 } else {
9103 if (alloc) *alloc = 1;
9104 ret = xmlStrndup(start, last - start);
9105 }
9106 CUR_PTR = in;
9107 ctxt->input->line = line;
9108 ctxt->input->col = col;
9109 if (alloc) *alloc = 0;
9110 return ret;
9111 need_complex:
9112 if (alloc) *alloc = 1;
9113 return xmlParseAttValueComplex(ctxt, len, normalize);
9114 }
9115
9116 /**
9117 * xmlParseAttribute2:
9118 * @ctxt: an XML parser context
9119 * @pref: the element prefix
9120 * @elem: the element name
9121 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9122 * @value: a xmlChar ** used to store the value of the attribute
9123 * @len: an int * to save the length of the attribute
9124 * @alloc: an int * to indicate if the attribute was allocated
9125 *
9126 * parse an attribute in the new SAX2 framework.
9127 *
9128 * Returns the attribute name, and the value in *value, .
9129 */
9130
9131 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9132 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9133 const xmlChar * pref, const xmlChar * elem,
9134 const xmlChar ** prefix, xmlChar ** value,
9135 int *len, int *alloc)
9136 {
9137 const xmlChar *name;
9138 xmlChar *val, *internal_val = NULL;
9139 int normalize = 0;
9140
9141 *value = NULL;
9142 GROW;
9143 name = xmlParseQName(ctxt, prefix);
9144 if (name == NULL) {
9145 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9146 "error parsing attribute name\n");
9147 return (NULL);
9148 }
9149
9150 /*
9151 * get the type if needed
9152 */
9153 if (ctxt->attsSpecial != NULL) {
9154 int type;
9155
9156 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9157 pref, elem, *prefix, name);
9158 if (type != 0)
9159 normalize = 1;
9160 }
9161
9162 /*
9163 * read the value
9164 */
9165 SKIP_BLANKS;
9166 if (RAW == '=') {
9167 NEXT;
9168 SKIP_BLANKS;
9169 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9170 if (normalize) {
9171 /*
9172 * Sometimes a second normalisation pass for spaces is needed
9173 * but that only happens if charrefs or entities references
9174 * have been used in the attribute value, i.e. the attribute
9175 * value have been extracted in an allocated string already.
9176 */
9177 if (*alloc) {
9178 const xmlChar *val2;
9179
9180 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9181 if ((val2 != NULL) && (val2 != val)) {
9182 xmlFree(val);
9183 val = (xmlChar *) val2;
9184 }
9185 }
9186 }
9187 ctxt->instate = XML_PARSER_CONTENT;
9188 } else {
9189 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9190 "Specification mandates value for attribute %s\n",
9191 name);
9192 return (NULL);
9193 }
9194
9195 if (*prefix == ctxt->str_xml) {
9196 /*
9197 * Check that xml:lang conforms to the specification
9198 * No more registered as an error, just generate a warning now
9199 * since this was deprecated in XML second edition
9200 */
9201 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9202 internal_val = xmlStrndup(val, *len);
9203 if (!xmlCheckLanguageID(internal_val)) {
9204 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9205 "Malformed value for xml:lang : %s\n",
9206 internal_val, NULL);
9207 }
9208 }
9209
9210 /*
9211 * Check that xml:space conforms to the specification
9212 */
9213 if (xmlStrEqual(name, BAD_CAST "space")) {
9214 internal_val = xmlStrndup(val, *len);
9215 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9216 *(ctxt->space) = 0;
9217 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9218 *(ctxt->space) = 1;
9219 else {
9220 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9221 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9222 internal_val, NULL);
9223 }
9224 }
9225 if (internal_val) {
9226 xmlFree(internal_val);
9227 }
9228 }
9229
9230 *value = val;
9231 return (name);
9232 }
9233 /**
9234 * xmlParseStartTag2:
9235 * @ctxt: an XML parser context
9236 *
9237 * parse a start of tag either for rule element or
9238 * EmptyElement. In both case we don't parse the tag closing chars.
9239 * This routine is called when running SAX2 parsing
9240 *
9241 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9242 *
9243 * [ WFC: Unique Att Spec ]
9244 * No attribute name may appear more than once in the same start-tag or
9245 * empty-element tag.
9246 *
9247 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9248 *
9249 * [ WFC: Unique Att Spec ]
9250 * No attribute name may appear more than once in the same start-tag or
9251 * empty-element tag.
9252 *
9253 * With namespace:
9254 *
9255 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9256 *
9257 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9258 *
9259 * Returns the element name parsed
9260 */
9261
9262 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9263 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9264 const xmlChar **URI, int *tlen) {
9265 const xmlChar *localname;
9266 const xmlChar *prefix;
9267 const xmlChar *attname;
9268 const xmlChar *aprefix;
9269 const xmlChar *nsname;
9270 xmlChar *attvalue;
9271 const xmlChar **atts = ctxt->atts;
9272 int maxatts = ctxt->maxatts;
9273 int nratts, nbatts, nbdef, inputid;
9274 int i, j, nbNs, attval;
9275 unsigned long cur;
9276 int nsNr = ctxt->nsNr;
9277
9278 if (RAW != '<') return(NULL);
9279 NEXT1;
9280
9281 /*
9282 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9283 * point since the attribute values may be stored as pointers to
9284 * the buffer and calling SHRINK would destroy them !
9285 * The Shrinking is only possible once the full set of attribute
9286 * callbacks have been done.
9287 */
9288 SHRINK;
9289 cur = ctxt->input->cur - ctxt->input->base;
9290 inputid = ctxt->input->id;
9291 nbatts = 0;
9292 nratts = 0;
9293 nbdef = 0;
9294 nbNs = 0;
9295 attval = 0;
9296 /* Forget any namespaces added during an earlier parse of this element. */
9297 ctxt->nsNr = nsNr;
9298
9299 localname = xmlParseQName(ctxt, &prefix);
9300 if (localname == NULL) {
9301 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9302 "StartTag: invalid element name\n");
9303 return(NULL);
9304 }
9305 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9306
9307 /*
9308 * Now parse the attributes, it ends up with the ending
9309 *
9310 * (S Attribute)* S?
9311 */
9312 SKIP_BLANKS;
9313 GROW;
9314
9315 while (((RAW != '>') &&
9316 ((RAW != '/') || (NXT(1) != '>')) &&
9317 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9318 const xmlChar *q = CUR_PTR;
9319 unsigned int cons = ctxt->input->consumed;
9320 int len = -1, alloc = 0;
9321
9322 attname = xmlParseAttribute2(ctxt, prefix, localname,
9323 &aprefix, &attvalue, &len, &alloc);
9324 if ((attname == NULL) || (attvalue == NULL))
9325 goto next_attr;
9326 if (len < 0) len = xmlStrlen(attvalue);
9327
9328 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9329 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9330 xmlURIPtr uri;
9331
9332 if (URL == NULL) {
9333 xmlErrMemory(ctxt, "dictionary allocation failure");
9334 if ((attvalue != NULL) && (alloc != 0))
9335 xmlFree(attvalue);
9336 localname = NULL;
9337 goto done;
9338 }
9339 if (*URL != 0) {
9340 uri = xmlParseURI((const char *) URL);
9341 if (uri == NULL) {
9342 xmlNsErr(ctxt, XML_WAR_NS_URI,
9343 "xmlns: '%s' is not a valid URI\n",
9344 URL, NULL, NULL);
9345 } else {
9346 if (uri->scheme == NULL) {
9347 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9348 "xmlns: URI %s is not absolute\n",
9349 URL, NULL, NULL);
9350 }
9351 xmlFreeURI(uri);
9352 }
9353 if (URL == ctxt->str_xml_ns) {
9354 if (attname != ctxt->str_xml) {
9355 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9356 "xml namespace URI cannot be the default namespace\n",
9357 NULL, NULL, NULL);
9358 }
9359 goto next_attr;
9360 }
9361 if ((len == 29) &&
9362 (xmlStrEqual(URL,
9363 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9364 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9365 "reuse of the xmlns namespace name is forbidden\n",
9366 NULL, NULL, NULL);
9367 goto next_attr;
9368 }
9369 }
9370 /*
9371 * check that it's not a defined namespace
9372 */
9373 for (j = 1;j <= nbNs;j++)
9374 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9375 break;
9376 if (j <= nbNs)
9377 xmlErrAttributeDup(ctxt, NULL, attname);
9378 else
9379 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9380
9381 } else if (aprefix == ctxt->str_xmlns) {
9382 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9383 xmlURIPtr uri;
9384
9385 if (attname == ctxt->str_xml) {
9386 if (URL != ctxt->str_xml_ns) {
9387 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9388 "xml namespace prefix mapped to wrong URI\n",
9389 NULL, NULL, NULL);
9390 }
9391 /*
9392 * Do not keep a namespace definition node
9393 */
9394 goto next_attr;
9395 }
9396 if (URL == ctxt->str_xml_ns) {
9397 if (attname != ctxt->str_xml) {
9398 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399 "xml namespace URI mapped to wrong prefix\n",
9400 NULL, NULL, NULL);
9401 }
9402 goto next_attr;
9403 }
9404 if (attname == ctxt->str_xmlns) {
9405 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9406 "redefinition of the xmlns prefix is forbidden\n",
9407 NULL, NULL, NULL);
9408 goto next_attr;
9409 }
9410 if ((len == 29) &&
9411 (xmlStrEqual(URL,
9412 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9413 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9414 "reuse of the xmlns namespace name is forbidden\n",
9415 NULL, NULL, NULL);
9416 goto next_attr;
9417 }
9418 if ((URL == NULL) || (URL[0] == 0)) {
9419 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9420 "xmlns:%s: Empty XML namespace is not allowed\n",
9421 attname, NULL, NULL);
9422 goto next_attr;
9423 } else {
9424 uri = xmlParseURI((const char *) URL);
9425 if (uri == NULL) {
9426 xmlNsErr(ctxt, XML_WAR_NS_URI,
9427 "xmlns:%s: '%s' is not a valid URI\n",
9428 attname, URL, NULL);
9429 } else {
9430 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9431 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9432 "xmlns:%s: URI %s is not absolute\n",
9433 attname, URL, NULL);
9434 }
9435 xmlFreeURI(uri);
9436 }
9437 }
9438
9439 /*
9440 * check that it's not a defined namespace
9441 */
9442 for (j = 1;j <= nbNs;j++)
9443 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9444 break;
9445 if (j <= nbNs)
9446 xmlErrAttributeDup(ctxt, aprefix, attname);
9447 else
9448 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9449
9450 } else {
9451 /*
9452 * Add the pair to atts
9453 */
9454 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9455 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9456 goto next_attr;
9457 }
9458 maxatts = ctxt->maxatts;
9459 atts = ctxt->atts;
9460 }
9461 ctxt->attallocs[nratts++] = alloc;
9462 atts[nbatts++] = attname;
9463 atts[nbatts++] = aprefix;
9464 /*
9465 * The namespace URI field is used temporarily to point at the
9466 * base of the current input buffer for non-alloced attributes.
9467 * When the input buffer is reallocated, all the pointers become
9468 * invalid, but they can be reconstructed later.
9469 */
9470 if (alloc)
9471 atts[nbatts++] = NULL;
9472 else
9473 atts[nbatts++] = ctxt->input->base;
9474 atts[nbatts++] = attvalue;
9475 attvalue += len;
9476 atts[nbatts++] = attvalue;
9477 /*
9478 * tag if some deallocation is needed
9479 */
9480 if (alloc != 0) attval = 1;
9481 attvalue = NULL; /* moved into atts */
9482 }
9483
9484 next_attr:
9485 if ((attvalue != NULL) && (alloc != 0)) {
9486 xmlFree(attvalue);
9487 attvalue = NULL;
9488 }
9489
9490 GROW
9491 if (ctxt->instate == XML_PARSER_EOF)
9492 break;
9493 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9494 break;
9495 if (SKIP_BLANKS == 0) {
9496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9497 "attributes construct error\n");
9498 break;
9499 }
9500 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9501 (attname == NULL) && (attvalue == NULL)) {
9502 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9503 "xmlParseStartTag: problem parsing attributes\n");
9504 break;
9505 }
9506 GROW;
9507 }
9508
9509 if (ctxt->input->id != inputid) {
9510 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9511 "Unexpected change of input\n");
9512 localname = NULL;
9513 goto done;
9514 }
9515
9516 /* Reconstruct attribute value pointers. */
9517 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9518 if (atts[i+2] != NULL) {
9519 /*
9520 * Arithmetic on dangling pointers is technically undefined
9521 * behavior, but well...
9522 */
9523 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9524 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9525 atts[i+3] += offset; /* value */
9526 atts[i+4] += offset; /* valuend */
9527 }
9528 }
9529
9530 /*
9531 * The attributes defaulting
9532 */
9533 if (ctxt->attsDefault != NULL) {
9534 xmlDefAttrsPtr defaults;
9535
9536 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9537 if (defaults != NULL) {
9538 for (i = 0;i < defaults->nbAttrs;i++) {
9539 attname = defaults->values[5 * i];
9540 aprefix = defaults->values[5 * i + 1];
9541
9542 /*
9543 * special work for namespaces defaulted defs
9544 */
9545 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9546 /*
9547 * check that it's not a defined namespace
9548 */
9549 for (j = 1;j <= nbNs;j++)
9550 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9551 break;
9552 if (j <= nbNs) continue;
9553
9554 nsname = xmlGetNamespace(ctxt, NULL);
9555 if (nsname != defaults->values[5 * i + 2]) {
9556 if (nsPush(ctxt, NULL,
9557 defaults->values[5 * i + 2]) > 0)
9558 nbNs++;
9559 }
9560 } else if (aprefix == ctxt->str_xmlns) {
9561 /*
9562 * check that it's not a defined namespace
9563 */
9564 for (j = 1;j <= nbNs;j++)
9565 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9566 break;
9567 if (j <= nbNs) continue;
9568
9569 nsname = xmlGetNamespace(ctxt, attname);
9570 if (nsname != defaults->values[2]) {
9571 if (nsPush(ctxt, attname,
9572 defaults->values[5 * i + 2]) > 0)
9573 nbNs++;
9574 }
9575 } else {
9576 /*
9577 * check that it's not a defined attribute
9578 */
9579 for (j = 0;j < nbatts;j+=5) {
9580 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9581 break;
9582 }
9583 if (j < nbatts) continue;
9584
9585 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9586 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9587 localname = NULL;
9588 goto done;
9589 }
9590 maxatts = ctxt->maxatts;
9591 atts = ctxt->atts;
9592 }
9593 atts[nbatts++] = attname;
9594 atts[nbatts++] = aprefix;
9595 if (aprefix == NULL)
9596 atts[nbatts++] = NULL;
9597 else
9598 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9599 atts[nbatts++] = defaults->values[5 * i + 2];
9600 atts[nbatts++] = defaults->values[5 * i + 3];
9601 if ((ctxt->standalone == 1) &&
9602 (defaults->values[5 * i + 4] != NULL)) {
9603 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9604 "standalone: attribute %s on %s defaulted from external subset\n",
9605 attname, localname);
9606 }
9607 nbdef++;
9608 }
9609 }
9610 }
9611 }
9612
9613 /*
9614 * The attributes checkings
9615 */
9616 for (i = 0; i < nbatts;i += 5) {
9617 /*
9618 * The default namespace does not apply to attribute names.
9619 */
9620 if (atts[i + 1] != NULL) {
9621 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9622 if (nsname == NULL) {
9623 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9624 "Namespace prefix %s for %s on %s is not defined\n",
9625 atts[i + 1], atts[i], localname);
9626 }
9627 atts[i + 2] = nsname;
9628 } else
9629 nsname = NULL;
9630 /*
9631 * [ WFC: Unique Att Spec ]
9632 * No attribute name may appear more than once in the same
9633 * start-tag or empty-element tag.
9634 * As extended by the Namespace in XML REC.
9635 */
9636 for (j = 0; j < i;j += 5) {
9637 if (atts[i] == atts[j]) {
9638 if (atts[i+1] == atts[j+1]) {
9639 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9640 break;
9641 }
9642 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9643 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9644 "Namespaced Attribute %s in '%s' redefined\n",
9645 atts[i], nsname, NULL);
9646 break;
9647 }
9648 }
9649 }
9650 }
9651
9652 nsname = xmlGetNamespace(ctxt, prefix);
9653 if ((prefix != NULL) && (nsname == NULL)) {
9654 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9655 "Namespace prefix %s on %s is not defined\n",
9656 prefix, localname, NULL);
9657 }
9658 *pref = prefix;
9659 *URI = nsname;
9660
9661 /*
9662 * SAX: Start of Element !
9663 */
9664 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9665 (!ctxt->disableSAX)) {
9666 if (nbNs > 0)
9667 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9668 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9669 nbatts / 5, nbdef, atts);
9670 else
9671 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9672 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9673 }
9674
9675 done:
9676 /*
9677 * Free up attribute allocated strings if needed
9678 */
9679 if (attval != 0) {
9680 for (i = 3,j = 0; j < nratts;i += 5,j++)
9681 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9682 xmlFree((xmlChar *) atts[i]);
9683 }
9684
9685 return(localname);
9686 }
9687
9688 /**
9689 * xmlParseEndTag2:
9690 * @ctxt: an XML parser context
9691 * @line: line of the start tag
9692 * @nsNr: number of namespaces on the start tag
9693 *
9694 * parse an end of tag
9695 *
9696 * [42] ETag ::= '</' Name S? '>'
9697 *
9698 * With namespace
9699 *
9700 * [NS 9] ETag ::= '</' QName S? '>'
9701 */
9702
9703 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9704 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9705 const xmlChar *name;
9706
9707 GROW;
9708 if ((RAW != '<') || (NXT(1) != '/')) {
9709 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9710 return;
9711 }
9712 SKIP(2);
9713
9714 if (tag->prefix == NULL)
9715 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9716 else
9717 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9718
9719 /*
9720 * We should definitely be at the ending "S? '>'" part
9721 */
9722 GROW;
9723 if (ctxt->instate == XML_PARSER_EOF)
9724 return;
9725 SKIP_BLANKS;
9726 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9727 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9728 } else
9729 NEXT1;
9730
9731 /*
9732 * [ WFC: Element Type Match ]
9733 * The Name in an element's end-tag must match the element type in the
9734 * start-tag.
9735 *
9736 */
9737 if (name != (xmlChar*)1) {
9738 if (name == NULL) name = BAD_CAST "unparsable";
9739 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9740 "Opening and ending tag mismatch: %s line %d and %s\n",
9741 ctxt->name, tag->line, name);
9742 }
9743
9744 /*
9745 * SAX: End of Tag
9746 */
9747 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9748 (!ctxt->disableSAX))
9749 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9750 tag->URI);
9751
9752 spacePop(ctxt);
9753 if (tag->nsNr != 0)
9754 nsPop(ctxt, tag->nsNr);
9755 }
9756
9757 /**
9758 * xmlParseCDSect:
9759 * @ctxt: an XML parser context
9760 *
9761 * Parse escaped pure raw content.
9762 *
9763 * [18] CDSect ::= CDStart CData CDEnd
9764 *
9765 * [19] CDStart ::= '<![CDATA['
9766 *
9767 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9768 *
9769 * [21] CDEnd ::= ']]>'
9770 */
9771 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9772 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9773 xmlChar *buf = NULL;
9774 int len = 0;
9775 int size = XML_PARSER_BUFFER_SIZE;
9776 int r, rl;
9777 int s, sl;
9778 int cur, l;
9779 int count = 0;
9780 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9781 XML_MAX_HUGE_LENGTH :
9782 XML_MAX_TEXT_LENGTH;
9783
9784 /* Check 2.6.0 was NXT(0) not RAW */
9785 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9786 SKIP(9);
9787 } else
9788 return;
9789
9790 ctxt->instate = XML_PARSER_CDATA_SECTION;
9791 r = CUR_CHAR(rl);
9792 if (!IS_CHAR(r)) {
9793 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9794 ctxt->instate = XML_PARSER_CONTENT;
9795 return;
9796 }
9797 NEXTL(rl);
9798 s = CUR_CHAR(sl);
9799 if (!IS_CHAR(s)) {
9800 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9801 ctxt->instate = XML_PARSER_CONTENT;
9802 return;
9803 }
9804 NEXTL(sl);
9805 cur = CUR_CHAR(l);
9806 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9807 if (buf == NULL) {
9808 xmlErrMemory(ctxt, NULL);
9809 return;
9810 }
9811 while (IS_CHAR(cur) &&
9812 ((r != ']') || (s != ']') || (cur != '>'))) {
9813 if (len + 5 >= size) {
9814 xmlChar *tmp;
9815
9816 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9817 if (tmp == NULL) {
9818 xmlFree(buf);
9819 xmlErrMemory(ctxt, NULL);
9820 return;
9821 }
9822 buf = tmp;
9823 size *= 2;
9824 }
9825 COPY_BUF(rl,buf,len,r);
9826 r = s;
9827 rl = sl;
9828 s = cur;
9829 sl = l;
9830 count++;
9831 if (count > 50) {
9832 SHRINK;
9833 GROW;
9834 if (ctxt->instate == XML_PARSER_EOF) {
9835 xmlFree(buf);
9836 return;
9837 }
9838 count = 0;
9839 }
9840 NEXTL(l);
9841 cur = CUR_CHAR(l);
9842 if (len > maxLength) {
9843 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9844 "CData section too big found\n");
9845 xmlFree(buf);
9846 return;
9847 }
9848 }
9849 buf[len] = 0;
9850 ctxt->instate = XML_PARSER_CONTENT;
9851 if (cur != '>') {
9852 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9853 "CData section not finished\n%.50s\n", buf);
9854 xmlFree(buf);
9855 return;
9856 }
9857 NEXTL(l);
9858
9859 /*
9860 * OK the buffer is to be consumed as cdata.
9861 */
9862 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9863 if (ctxt->sax->cdataBlock != NULL)
9864 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9865 else if (ctxt->sax->characters != NULL)
9866 ctxt->sax->characters(ctxt->userData, buf, len);
9867 }
9868 xmlFree(buf);
9869 }
9870
9871 /**
9872 * xmlParseContentInternal:
9873 * @ctxt: an XML parser context
9874 *
9875 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9876 * unexpected EOF to the caller.
9877 */
9878
9879 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9880 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9881 int nameNr = ctxt->nameNr;
9882
9883 GROW;
9884 while ((RAW != 0) &&
9885 (ctxt->instate != XML_PARSER_EOF)) {
9886 const xmlChar *test = CUR_PTR;
9887 unsigned int cons = ctxt->input->consumed;
9888 const xmlChar *cur = ctxt->input->cur;
9889
9890 /*
9891 * First case : a Processing Instruction.
9892 */
9893 if ((*cur == '<') && (cur[1] == '?')) {
9894 xmlParsePI(ctxt);
9895 }
9896
9897 /*
9898 * Second case : a CDSection
9899 */
9900 /* 2.6.0 test was *cur not RAW */
9901 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9902 xmlParseCDSect(ctxt);
9903 }
9904
9905 /*
9906 * Third case : a comment
9907 */
9908 else if ((*cur == '<') && (NXT(1) == '!') &&
9909 (NXT(2) == '-') && (NXT(3) == '-')) {
9910 xmlParseComment(ctxt);
9911 ctxt->instate = XML_PARSER_CONTENT;
9912 }
9913
9914 /*
9915 * Fourth case : a sub-element.
9916 */
9917 else if (*cur == '<') {
9918 if (NXT(1) == '/') {
9919 if (ctxt->nameNr <= nameNr)
9920 break;
9921 xmlParseElementEnd(ctxt);
9922 } else {
9923 xmlParseElementStart(ctxt);
9924 }
9925 }
9926
9927 /*
9928 * Fifth case : a reference. If if has not been resolved,
9929 * parsing returns it's Name, create the node
9930 */
9931
9932 else if (*cur == '&') {
9933 xmlParseReference(ctxt);
9934 }
9935
9936 /*
9937 * Last case, text. Note that References are handled directly.
9938 */
9939 else {
9940 xmlParseCharData(ctxt, 0);
9941 }
9942
9943 GROW;
9944 SHRINK;
9945
9946 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9947 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9948 "detected an error in element content\n");
9949 xmlHaltParser(ctxt);
9950 break;
9951 }
9952 }
9953 }
9954
9955 /**
9956 * xmlParseContent:
9957 * @ctxt: an XML parser context
9958 *
9959 * Parse a content sequence. Stops at EOF or '</'.
9960 *
9961 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9962 */
9963
9964 void
xmlParseContent(xmlParserCtxtPtr ctxt)9965 xmlParseContent(xmlParserCtxtPtr ctxt) {
9966 int nameNr = ctxt->nameNr;
9967
9968 xmlParseContentInternal(ctxt);
9969
9970 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9971 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9972 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9973 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9974 "Premature end of data in tag %s line %d\n",
9975 name, line, NULL);
9976 }
9977 }
9978
9979 /**
9980 * xmlParseElement:
9981 * @ctxt: an XML parser context
9982 *
9983 * parse an XML element
9984 *
9985 * [39] element ::= EmptyElemTag | STag content ETag
9986 *
9987 * [ WFC: Element Type Match ]
9988 * The Name in an element's end-tag must match the element type in the
9989 * start-tag.
9990 *
9991 */
9992
9993 void
xmlParseElement(xmlParserCtxtPtr ctxt)9994 xmlParseElement(xmlParserCtxtPtr ctxt) {
9995 if (xmlParseElementStart(ctxt) != 0)
9996 return;
9997
9998 xmlParseContentInternal(ctxt);
9999 if (ctxt->instate == XML_PARSER_EOF)
10000 return;
10001
10002 if (CUR == 0) {
10003 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10004 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10005 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10006 "Premature end of data in tag %s line %d\n",
10007 name, line, NULL);
10008 return;
10009 }
10010
10011 xmlParseElementEnd(ctxt);
10012 }
10013
10014 /**
10015 * xmlParseElementStart:
10016 * @ctxt: an XML parser context
10017 *
10018 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10019 * opening tag was parsed, 1 if an empty element was parsed.
10020 */
10021 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10022 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10023 const xmlChar *name;
10024 const xmlChar *prefix = NULL;
10025 const xmlChar *URI = NULL;
10026 xmlParserNodeInfo node_info;
10027 int line, tlen = 0;
10028 xmlNodePtr ret;
10029 int nsNr = ctxt->nsNr;
10030
10031 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10032 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10033 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10034 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10035 xmlParserMaxDepth);
10036 xmlHaltParser(ctxt);
10037 return(-1);
10038 }
10039
10040 /* Capture start position */
10041 if (ctxt->record_info) {
10042 node_info.begin_pos = ctxt->input->consumed +
10043 (CUR_PTR - ctxt->input->base);
10044 node_info.begin_line = ctxt->input->line;
10045 }
10046
10047 if (ctxt->spaceNr == 0)
10048 spacePush(ctxt, -1);
10049 else if (*ctxt->space == -2)
10050 spacePush(ctxt, -1);
10051 else
10052 spacePush(ctxt, *ctxt->space);
10053
10054 line = ctxt->input->line;
10055 #ifdef LIBXML_SAX1_ENABLED
10056 if (ctxt->sax2)
10057 #endif /* LIBXML_SAX1_ENABLED */
10058 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10059 #ifdef LIBXML_SAX1_ENABLED
10060 else
10061 name = xmlParseStartTag(ctxt);
10062 #endif /* LIBXML_SAX1_ENABLED */
10063 if (ctxt->instate == XML_PARSER_EOF)
10064 return(-1);
10065 if (name == NULL) {
10066 spacePop(ctxt);
10067 return(-1);
10068 }
10069 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10070 ret = ctxt->node;
10071
10072 #ifdef LIBXML_VALID_ENABLED
10073 /*
10074 * [ VC: Root Element Type ]
10075 * The Name in the document type declaration must match the element
10076 * type of the root element.
10077 */
10078 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10079 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10080 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10081 #endif /* LIBXML_VALID_ENABLED */
10082
10083 /*
10084 * Check for an Empty Element.
10085 */
10086 if ((RAW == '/') && (NXT(1) == '>')) {
10087 SKIP(2);
10088 if (ctxt->sax2) {
10089 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10090 (!ctxt->disableSAX))
10091 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10092 #ifdef LIBXML_SAX1_ENABLED
10093 } else {
10094 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10095 (!ctxt->disableSAX))
10096 ctxt->sax->endElement(ctxt->userData, name);
10097 #endif /* LIBXML_SAX1_ENABLED */
10098 }
10099 namePop(ctxt);
10100 spacePop(ctxt);
10101 if (nsNr != ctxt->nsNr)
10102 nsPop(ctxt, ctxt->nsNr - nsNr);
10103 if ( ret != NULL && ctxt->record_info ) {
10104 node_info.end_pos = ctxt->input->consumed +
10105 (CUR_PTR - ctxt->input->base);
10106 node_info.end_line = ctxt->input->line;
10107 node_info.node = ret;
10108 xmlParserAddNodeInfo(ctxt, &node_info);
10109 }
10110 return(1);
10111 }
10112 if (RAW == '>') {
10113 NEXT1;
10114 } else {
10115 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10116 "Couldn't find end of Start Tag %s line %d\n",
10117 name, line, NULL);
10118
10119 /*
10120 * end of parsing of this node.
10121 */
10122 nodePop(ctxt);
10123 namePop(ctxt);
10124 spacePop(ctxt);
10125 if (nsNr != ctxt->nsNr)
10126 nsPop(ctxt, ctxt->nsNr - nsNr);
10127
10128 /*
10129 * Capture end position and add node
10130 */
10131 if ( ret != NULL && ctxt->record_info ) {
10132 node_info.end_pos = ctxt->input->consumed +
10133 (CUR_PTR - ctxt->input->base);
10134 node_info.end_line = ctxt->input->line;
10135 node_info.node = ret;
10136 xmlParserAddNodeInfo(ctxt, &node_info);
10137 }
10138 return(-1);
10139 }
10140
10141 return(0);
10142 }
10143
10144 /**
10145 * xmlParseElementEnd:
10146 * @ctxt: an XML parser context
10147 *
10148 * Parse the end of an XML element.
10149 */
10150 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10151 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10152 xmlParserNodeInfo node_info;
10153 xmlNodePtr ret = ctxt->node;
10154
10155 if (ctxt->nameNr <= 0)
10156 return;
10157
10158 /*
10159 * parse the end of tag: '</' should be here.
10160 */
10161 if (ctxt->sax2) {
10162 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10163 namePop(ctxt);
10164 }
10165 #ifdef LIBXML_SAX1_ENABLED
10166 else
10167 xmlParseEndTag1(ctxt, 0);
10168 #endif /* LIBXML_SAX1_ENABLED */
10169
10170 /*
10171 * Capture end position and add node
10172 */
10173 if ( ret != NULL && ctxt->record_info ) {
10174 node_info.end_pos = ctxt->input->consumed +
10175 (CUR_PTR - ctxt->input->base);
10176 node_info.end_line = ctxt->input->line;
10177 node_info.node = ret;
10178 xmlParserAddNodeInfo(ctxt, &node_info);
10179 }
10180 }
10181
10182 /**
10183 * xmlParseVersionNum:
10184 * @ctxt: an XML parser context
10185 *
10186 * parse the XML version value.
10187 *
10188 * [26] VersionNum ::= '1.' [0-9]+
10189 *
10190 * In practice allow [0-9].[0-9]+ at that level
10191 *
10192 * Returns the string giving the XML version number, or NULL
10193 */
10194 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10195 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10196 xmlChar *buf = NULL;
10197 int len = 0;
10198 int size = 10;
10199 xmlChar cur;
10200
10201 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10202 if (buf == NULL) {
10203 xmlErrMemory(ctxt, NULL);
10204 return(NULL);
10205 }
10206 cur = CUR;
10207 if (!((cur >= '0') && (cur <= '9'))) {
10208 xmlFree(buf);
10209 return(NULL);
10210 }
10211 buf[len++] = cur;
10212 NEXT;
10213 cur=CUR;
10214 if (cur != '.') {
10215 xmlFree(buf);
10216 return(NULL);
10217 }
10218 buf[len++] = cur;
10219 NEXT;
10220 cur=CUR;
10221 while ((cur >= '0') && (cur <= '9')) {
10222 if (len + 1 >= size) {
10223 xmlChar *tmp;
10224
10225 size *= 2;
10226 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10227 if (tmp == NULL) {
10228 xmlFree(buf);
10229 xmlErrMemory(ctxt, NULL);
10230 return(NULL);
10231 }
10232 buf = tmp;
10233 }
10234 buf[len++] = cur;
10235 NEXT;
10236 cur=CUR;
10237 }
10238 buf[len] = 0;
10239 return(buf);
10240 }
10241
10242 /**
10243 * xmlParseVersionInfo:
10244 * @ctxt: an XML parser context
10245 *
10246 * parse the XML version.
10247 *
10248 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10249 *
10250 * [25] Eq ::= S? '=' S?
10251 *
10252 * Returns the version string, e.g. "1.0"
10253 */
10254
10255 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10256 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10257 xmlChar *version = NULL;
10258
10259 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10260 SKIP(7);
10261 SKIP_BLANKS;
10262 if (RAW != '=') {
10263 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10264 return(NULL);
10265 }
10266 NEXT;
10267 SKIP_BLANKS;
10268 if (RAW == '"') {
10269 NEXT;
10270 version = xmlParseVersionNum(ctxt);
10271 if (RAW != '"') {
10272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10273 } else
10274 NEXT;
10275 } else if (RAW == '\''){
10276 NEXT;
10277 version = xmlParseVersionNum(ctxt);
10278 if (RAW != '\'') {
10279 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10280 } else
10281 NEXT;
10282 } else {
10283 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10284 }
10285 }
10286 return(version);
10287 }
10288
10289 /**
10290 * xmlParseEncName:
10291 * @ctxt: an XML parser context
10292 *
10293 * parse the XML encoding name
10294 *
10295 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10296 *
10297 * Returns the encoding name value or NULL
10298 */
10299 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10300 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10301 xmlChar *buf = NULL;
10302 int len = 0;
10303 int size = 10;
10304 xmlChar cur;
10305
10306 cur = CUR;
10307 if (((cur >= 'a') && (cur <= 'z')) ||
10308 ((cur >= 'A') && (cur <= 'Z'))) {
10309 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10310 if (buf == NULL) {
10311 xmlErrMemory(ctxt, NULL);
10312 return(NULL);
10313 }
10314
10315 buf[len++] = cur;
10316 NEXT;
10317 cur = CUR;
10318 while (((cur >= 'a') && (cur <= 'z')) ||
10319 ((cur >= 'A') && (cur <= 'Z')) ||
10320 ((cur >= '0') && (cur <= '9')) ||
10321 (cur == '.') || (cur == '_') ||
10322 (cur == '-')) {
10323 if (len + 1 >= size) {
10324 xmlChar *tmp;
10325
10326 size *= 2;
10327 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10328 if (tmp == NULL) {
10329 xmlErrMemory(ctxt, NULL);
10330 xmlFree(buf);
10331 return(NULL);
10332 }
10333 buf = tmp;
10334 }
10335 buf[len++] = cur;
10336 NEXT;
10337 cur = CUR;
10338 if (cur == 0) {
10339 SHRINK;
10340 GROW;
10341 cur = CUR;
10342 }
10343 }
10344 buf[len] = 0;
10345 } else {
10346 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10347 }
10348 return(buf);
10349 }
10350
10351 /**
10352 * xmlParseEncodingDecl:
10353 * @ctxt: an XML parser context
10354 *
10355 * parse the XML encoding declaration
10356 *
10357 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10358 *
10359 * this setups the conversion filters.
10360 *
10361 * Returns the encoding value or NULL
10362 */
10363
10364 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10365 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10366 xmlChar *encoding = NULL;
10367
10368 SKIP_BLANKS;
10369 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10370 SKIP(8);
10371 SKIP_BLANKS;
10372 if (RAW != '=') {
10373 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10374 return(NULL);
10375 }
10376 NEXT;
10377 SKIP_BLANKS;
10378 if (RAW == '"') {
10379 NEXT;
10380 encoding = xmlParseEncName(ctxt);
10381 if (RAW != '"') {
10382 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10383 xmlFree((xmlChar *) encoding);
10384 return(NULL);
10385 } else
10386 NEXT;
10387 } else if (RAW == '\''){
10388 NEXT;
10389 encoding = xmlParseEncName(ctxt);
10390 if (RAW != '\'') {
10391 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10392 xmlFree((xmlChar *) encoding);
10393 return(NULL);
10394 } else
10395 NEXT;
10396 } else {
10397 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10398 }
10399
10400 /*
10401 * Non standard parsing, allowing the user to ignore encoding
10402 */
10403 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10404 xmlFree((xmlChar *) encoding);
10405 return(NULL);
10406 }
10407
10408 /*
10409 * UTF-16 encoding switch has already taken place at this stage,
10410 * more over the little-endian/big-endian selection is already done
10411 */
10412 if ((encoding != NULL) &&
10413 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10414 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10415 /*
10416 * If no encoding was passed to the parser, that we are
10417 * using UTF-16 and no decoder is present i.e. the
10418 * document is apparently UTF-8 compatible, then raise an
10419 * encoding mismatch fatal error
10420 */
10421 if ((ctxt->encoding == NULL) &&
10422 (ctxt->input->buf != NULL) &&
10423 (ctxt->input->buf->encoder == NULL)) {
10424 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10425 "Document labelled UTF-16 but has UTF-8 content\n");
10426 }
10427 if (ctxt->encoding != NULL)
10428 xmlFree((xmlChar *) ctxt->encoding);
10429 ctxt->encoding = encoding;
10430 }
10431 /*
10432 * UTF-8 encoding is handled natively
10433 */
10434 else if ((encoding != NULL) &&
10435 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10436 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10437 if (ctxt->encoding != NULL)
10438 xmlFree((xmlChar *) ctxt->encoding);
10439 ctxt->encoding = encoding;
10440 }
10441 else if (encoding != NULL) {
10442 xmlCharEncodingHandlerPtr handler;
10443
10444 if (ctxt->input->encoding != NULL)
10445 xmlFree((xmlChar *) ctxt->input->encoding);
10446 ctxt->input->encoding = encoding;
10447
10448 handler = xmlFindCharEncodingHandler((const char *) encoding);
10449 if (handler != NULL) {
10450 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10451 /* failed to convert */
10452 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10453 return(NULL);
10454 }
10455 } else {
10456 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10457 "Unsupported encoding %s\n", encoding);
10458 return(NULL);
10459 }
10460 }
10461 }
10462 return(encoding);
10463 }
10464
10465 /**
10466 * xmlParseSDDecl:
10467 * @ctxt: an XML parser context
10468 *
10469 * parse the XML standalone declaration
10470 *
10471 * [32] SDDecl ::= S 'standalone' Eq
10472 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10473 *
10474 * [ VC: Standalone Document Declaration ]
10475 * TODO The standalone document declaration must have the value "no"
10476 * if any external markup declarations contain declarations of:
10477 * - attributes with default values, if elements to which these
10478 * attributes apply appear in the document without specifications
10479 * of values for these attributes, or
10480 * - entities (other than amp, lt, gt, apos, quot), if references
10481 * to those entities appear in the document, or
10482 * - attributes with values subject to normalization, where the
10483 * attribute appears in the document with a value which will change
10484 * as a result of normalization, or
10485 * - element types with element content, if white space occurs directly
10486 * within any instance of those types.
10487 *
10488 * Returns:
10489 * 1 if standalone="yes"
10490 * 0 if standalone="no"
10491 * -2 if standalone attribute is missing or invalid
10492 * (A standalone value of -2 means that the XML declaration was found,
10493 * but no value was specified for the standalone attribute).
10494 */
10495
10496 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10497 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10498 int standalone = -2;
10499
10500 SKIP_BLANKS;
10501 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10502 SKIP(10);
10503 SKIP_BLANKS;
10504 if (RAW != '=') {
10505 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10506 return(standalone);
10507 }
10508 NEXT;
10509 SKIP_BLANKS;
10510 if (RAW == '\''){
10511 NEXT;
10512 if ((RAW == 'n') && (NXT(1) == 'o')) {
10513 standalone = 0;
10514 SKIP(2);
10515 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10516 (NXT(2) == 's')) {
10517 standalone = 1;
10518 SKIP(3);
10519 } else {
10520 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10521 }
10522 if (RAW != '\'') {
10523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10524 } else
10525 NEXT;
10526 } else if (RAW == '"'){
10527 NEXT;
10528 if ((RAW == 'n') && (NXT(1) == 'o')) {
10529 standalone = 0;
10530 SKIP(2);
10531 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10532 (NXT(2) == 's')) {
10533 standalone = 1;
10534 SKIP(3);
10535 } else {
10536 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10537 }
10538 if (RAW != '"') {
10539 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10540 } else
10541 NEXT;
10542 } else {
10543 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10544 }
10545 }
10546 return(standalone);
10547 }
10548
10549 /**
10550 * xmlParseXMLDecl:
10551 * @ctxt: an XML parser context
10552 *
10553 * parse an XML declaration header
10554 *
10555 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10556 */
10557
10558 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10559 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10560 xmlChar *version;
10561
10562 /*
10563 * This value for standalone indicates that the document has an
10564 * XML declaration but it does not have a standalone attribute.
10565 * It will be overwritten later if a standalone attribute is found.
10566 */
10567 ctxt->input->standalone = -2;
10568
10569 /*
10570 * We know that '<?xml' is here.
10571 */
10572 SKIP(5);
10573
10574 if (!IS_BLANK_CH(RAW)) {
10575 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10576 "Blank needed after '<?xml'\n");
10577 }
10578 SKIP_BLANKS;
10579
10580 /*
10581 * We must have the VersionInfo here.
10582 */
10583 version = xmlParseVersionInfo(ctxt);
10584 if (version == NULL) {
10585 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10586 } else {
10587 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10588 /*
10589 * Changed here for XML-1.0 5th edition
10590 */
10591 if (ctxt->options & XML_PARSE_OLD10) {
10592 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10593 "Unsupported version '%s'\n",
10594 version);
10595 } else {
10596 if ((version[0] == '1') && ((version[1] == '.'))) {
10597 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10598 "Unsupported version '%s'\n",
10599 version, NULL);
10600 } else {
10601 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10602 "Unsupported version '%s'\n",
10603 version);
10604 }
10605 }
10606 }
10607 if (ctxt->version != NULL)
10608 xmlFree((void *) ctxt->version);
10609 ctxt->version = version;
10610 }
10611
10612 /*
10613 * We may have the encoding declaration
10614 */
10615 if (!IS_BLANK_CH(RAW)) {
10616 if ((RAW == '?') && (NXT(1) == '>')) {
10617 SKIP(2);
10618 return;
10619 }
10620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10621 }
10622 xmlParseEncodingDecl(ctxt);
10623 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10624 (ctxt->instate == XML_PARSER_EOF)) {
10625 /*
10626 * The XML REC instructs us to stop parsing right here
10627 */
10628 return;
10629 }
10630
10631 /*
10632 * We may have the standalone status.
10633 */
10634 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10635 if ((RAW == '?') && (NXT(1) == '>')) {
10636 SKIP(2);
10637 return;
10638 }
10639 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10640 }
10641
10642 /*
10643 * We can grow the input buffer freely at that point
10644 */
10645 GROW;
10646
10647 SKIP_BLANKS;
10648 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10649
10650 SKIP_BLANKS;
10651 if ((RAW == '?') && (NXT(1) == '>')) {
10652 SKIP(2);
10653 } else if (RAW == '>') {
10654 /* Deprecated old WD ... */
10655 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10656 NEXT;
10657 } else {
10658 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10659 MOVETO_ENDTAG(CUR_PTR);
10660 NEXT;
10661 }
10662 }
10663
10664 /**
10665 * xmlParseMisc:
10666 * @ctxt: an XML parser context
10667 *
10668 * parse an XML Misc* optional field.
10669 *
10670 * [27] Misc ::= Comment | PI | S
10671 */
10672
10673 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10674 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10675 while (ctxt->instate != XML_PARSER_EOF) {
10676 SKIP_BLANKS;
10677 GROW;
10678 if ((RAW == '<') && (NXT(1) == '?')) {
10679 xmlParsePI(ctxt);
10680 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10681 xmlParseComment(ctxt);
10682 } else {
10683 break;
10684 }
10685 }
10686 }
10687
10688 /**
10689 * xmlParseDocument:
10690 * @ctxt: an XML parser context
10691 *
10692 * parse an XML document (and build a tree if using the standard SAX
10693 * interface).
10694 *
10695 * [1] document ::= prolog element Misc*
10696 *
10697 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10698 *
10699 * Returns 0, -1 in case of error. the parser context is augmented
10700 * as a result of the parsing.
10701 */
10702
10703 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10704 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10705 xmlChar start[4];
10706 xmlCharEncoding enc;
10707
10708 xmlInitParser();
10709
10710 if ((ctxt == NULL) || (ctxt->input == NULL))
10711 return(-1);
10712
10713 GROW;
10714
10715 /*
10716 * SAX: detecting the level.
10717 */
10718 xmlDetectSAX2(ctxt);
10719
10720 /*
10721 * SAX: beginning of the document processing.
10722 */
10723 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10724 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10725 if (ctxt->instate == XML_PARSER_EOF)
10726 return(-1);
10727
10728 if ((ctxt->encoding == NULL) &&
10729 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10730 /*
10731 * Get the 4 first bytes and decode the charset
10732 * if enc != XML_CHAR_ENCODING_NONE
10733 * plug some encoding conversion routines.
10734 */
10735 start[0] = RAW;
10736 start[1] = NXT(1);
10737 start[2] = NXT(2);
10738 start[3] = NXT(3);
10739 enc = xmlDetectCharEncoding(&start[0], 4);
10740 if (enc != XML_CHAR_ENCODING_NONE) {
10741 xmlSwitchEncoding(ctxt, enc);
10742 }
10743 }
10744
10745
10746 if (CUR == 0) {
10747 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10748 return(-1);
10749 }
10750
10751 /*
10752 * Check for the XMLDecl in the Prolog.
10753 * do not GROW here to avoid the detected encoder to decode more
10754 * than just the first line, unless the amount of data is really
10755 * too small to hold "<?xml version="1.0" encoding="foo"
10756 */
10757 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10758 GROW;
10759 }
10760 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10761
10762 /*
10763 * Note that we will switch encoding on the fly.
10764 */
10765 xmlParseXMLDecl(ctxt);
10766 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10767 (ctxt->instate == XML_PARSER_EOF)) {
10768 /*
10769 * The XML REC instructs us to stop parsing right here
10770 */
10771 return(-1);
10772 }
10773 ctxt->standalone = ctxt->input->standalone;
10774 SKIP_BLANKS;
10775 } else {
10776 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10777 }
10778 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10779 ctxt->sax->startDocument(ctxt->userData);
10780 if (ctxt->instate == XML_PARSER_EOF)
10781 return(-1);
10782 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10783 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10784 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10785 }
10786
10787 /*
10788 * The Misc part of the Prolog
10789 */
10790 xmlParseMisc(ctxt);
10791
10792 /*
10793 * Then possibly doc type declaration(s) and more Misc
10794 * (doctypedecl Misc*)?
10795 */
10796 GROW;
10797 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10798
10799 ctxt->inSubset = 1;
10800 xmlParseDocTypeDecl(ctxt);
10801 if (RAW == '[') {
10802 ctxt->instate = XML_PARSER_DTD;
10803 xmlParseInternalSubset(ctxt);
10804 if (ctxt->instate == XML_PARSER_EOF)
10805 return(-1);
10806 }
10807
10808 /*
10809 * Create and update the external subset.
10810 */
10811 ctxt->inSubset = 2;
10812 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10813 (!ctxt->disableSAX))
10814 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10815 ctxt->extSubSystem, ctxt->extSubURI);
10816 if (ctxt->instate == XML_PARSER_EOF)
10817 return(-1);
10818 ctxt->inSubset = 0;
10819
10820 xmlCleanSpecialAttr(ctxt);
10821
10822 ctxt->instate = XML_PARSER_PROLOG;
10823 xmlParseMisc(ctxt);
10824 }
10825
10826 /*
10827 * Time to start parsing the tree itself
10828 */
10829 GROW;
10830 if (RAW != '<') {
10831 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10832 "Start tag expected, '<' not found\n");
10833 } else {
10834 ctxt->instate = XML_PARSER_CONTENT;
10835 xmlParseElement(ctxt);
10836 ctxt->instate = XML_PARSER_EPILOG;
10837
10838
10839 /*
10840 * The Misc part at the end
10841 */
10842 xmlParseMisc(ctxt);
10843
10844 if (RAW != 0) {
10845 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10846 }
10847 ctxt->instate = XML_PARSER_EOF;
10848 }
10849
10850 /*
10851 * SAX: end of the document processing.
10852 */
10853 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10854 ctxt->sax->endDocument(ctxt->userData);
10855
10856 /*
10857 * Remove locally kept entity definitions if the tree was not built
10858 */
10859 if ((ctxt->myDoc != NULL) &&
10860 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10861 xmlFreeDoc(ctxt->myDoc);
10862 ctxt->myDoc = NULL;
10863 }
10864
10865 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10866 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10867 if (ctxt->valid)
10868 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10869 if (ctxt->nsWellFormed)
10870 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10871 if (ctxt->options & XML_PARSE_OLD10)
10872 ctxt->myDoc->properties |= XML_DOC_OLD10;
10873 }
10874 if (! ctxt->wellFormed) {
10875 ctxt->valid = 0;
10876 return(-1);
10877 }
10878 return(0);
10879 }
10880
10881 /**
10882 * xmlParseExtParsedEnt:
10883 * @ctxt: an XML parser context
10884 *
10885 * parse a general parsed entity
10886 * An external general parsed entity is well-formed if it matches the
10887 * production labeled extParsedEnt.
10888 *
10889 * [78] extParsedEnt ::= TextDecl? content
10890 *
10891 * Returns 0, -1 in case of error. the parser context is augmented
10892 * as a result of the parsing.
10893 */
10894
10895 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10896 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10897 xmlChar start[4];
10898 xmlCharEncoding enc;
10899
10900 if ((ctxt == NULL) || (ctxt->input == NULL))
10901 return(-1);
10902
10903 xmlDefaultSAXHandlerInit();
10904
10905 xmlDetectSAX2(ctxt);
10906
10907 GROW;
10908
10909 /*
10910 * SAX: beginning of the document processing.
10911 */
10912 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10913 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10914
10915 /*
10916 * Get the 4 first bytes and decode the charset
10917 * if enc != XML_CHAR_ENCODING_NONE
10918 * plug some encoding conversion routines.
10919 */
10920 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10921 start[0] = RAW;
10922 start[1] = NXT(1);
10923 start[2] = NXT(2);
10924 start[3] = NXT(3);
10925 enc = xmlDetectCharEncoding(start, 4);
10926 if (enc != XML_CHAR_ENCODING_NONE) {
10927 xmlSwitchEncoding(ctxt, enc);
10928 }
10929 }
10930
10931
10932 if (CUR == 0) {
10933 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10934 }
10935
10936 /*
10937 * Check for the XMLDecl in the Prolog.
10938 */
10939 GROW;
10940 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10941
10942 /*
10943 * Note that we will switch encoding on the fly.
10944 */
10945 xmlParseXMLDecl(ctxt);
10946 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10947 /*
10948 * The XML REC instructs us to stop parsing right here
10949 */
10950 return(-1);
10951 }
10952 SKIP_BLANKS;
10953 } else {
10954 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10955 }
10956 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10957 ctxt->sax->startDocument(ctxt->userData);
10958 if (ctxt->instate == XML_PARSER_EOF)
10959 return(-1);
10960
10961 /*
10962 * Doing validity checking on chunk doesn't make sense
10963 */
10964 ctxt->instate = XML_PARSER_CONTENT;
10965 ctxt->validate = 0;
10966 ctxt->loadsubset = 0;
10967 ctxt->depth = 0;
10968
10969 xmlParseContent(ctxt);
10970 if (ctxt->instate == XML_PARSER_EOF)
10971 return(-1);
10972
10973 if ((RAW == '<') && (NXT(1) == '/')) {
10974 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10975 } else if (RAW != 0) {
10976 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10977 }
10978
10979 /*
10980 * SAX: end of the document processing.
10981 */
10982 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10983 ctxt->sax->endDocument(ctxt->userData);
10984
10985 if (! ctxt->wellFormed) return(-1);
10986 return(0);
10987 }
10988
10989 #ifdef LIBXML_PUSH_ENABLED
10990 /************************************************************************
10991 * *
10992 * Progressive parsing interfaces *
10993 * *
10994 ************************************************************************/
10995
10996 /**
10997 * xmlParseLookupSequence:
10998 * @ctxt: an XML parser context
10999 * @first: the first char to lookup
11000 * @next: the next char to lookup or zero
11001 * @third: the next char to lookup or zero
11002 *
11003 * Try to find if a sequence (first, next, third) or just (first next) or
11004 * (first) is available in the input stream.
11005 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11006 * to avoid rescanning sequences of bytes, it DOES change the state of the
11007 * parser, do not use liberally.
11008 *
11009 * Returns the index to the current parsing point if the full sequence
11010 * is available, -1 otherwise.
11011 */
11012 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11013 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11014 xmlChar next, xmlChar third) {
11015 int base, len;
11016 xmlParserInputPtr in;
11017 const xmlChar *buf;
11018
11019 in = ctxt->input;
11020 if (in == NULL) return(-1);
11021 base = in->cur - in->base;
11022 if (base < 0) return(-1);
11023 if (ctxt->checkIndex > base)
11024 base = ctxt->checkIndex;
11025 if (in->buf == NULL) {
11026 buf = in->base;
11027 len = in->length;
11028 } else {
11029 buf = xmlBufContent(in->buf->buffer);
11030 len = xmlBufUse(in->buf->buffer);
11031 }
11032 /* take into account the sequence length */
11033 if (third) len -= 2;
11034 else if (next) len --;
11035 for (;base < len;base++) {
11036 if (buf[base] == first) {
11037 if (third != 0) {
11038 if ((buf[base + 1] != next) ||
11039 (buf[base + 2] != third)) continue;
11040 } else if (next != 0) {
11041 if (buf[base + 1] != next) continue;
11042 }
11043 ctxt->checkIndex = 0;
11044 #ifdef DEBUG_PUSH
11045 if (next == 0)
11046 xmlGenericError(xmlGenericErrorContext,
11047 "PP: lookup '%c' found at %d\n",
11048 first, base);
11049 else if (third == 0)
11050 xmlGenericError(xmlGenericErrorContext,
11051 "PP: lookup '%c%c' found at %d\n",
11052 first, next, base);
11053 else
11054 xmlGenericError(xmlGenericErrorContext,
11055 "PP: lookup '%c%c%c' found at %d\n",
11056 first, next, third, base);
11057 #endif
11058 return(base - (in->cur - in->base));
11059 }
11060 }
11061 ctxt->checkIndex = base;
11062 #ifdef DEBUG_PUSH
11063 if (next == 0)
11064 xmlGenericError(xmlGenericErrorContext,
11065 "PP: lookup '%c' failed\n", first);
11066 else if (third == 0)
11067 xmlGenericError(xmlGenericErrorContext,
11068 "PP: lookup '%c%c' failed\n", first, next);
11069 else
11070 xmlGenericError(xmlGenericErrorContext,
11071 "PP: lookup '%c%c%c' failed\n", first, next, third);
11072 #endif
11073 return(-1);
11074 }
11075
11076 /**
11077 * xmlParseGetLasts:
11078 * @ctxt: an XML parser context
11079 * @lastlt: pointer to store the last '<' from the input
11080 * @lastgt: pointer to store the last '>' from the input
11081 *
11082 * Lookup the last < and > in the current chunk
11083 */
11084 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11085 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11086 const xmlChar **lastgt) {
11087 const xmlChar *tmp;
11088
11089 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11090 xmlGenericError(xmlGenericErrorContext,
11091 "Internal error: xmlParseGetLasts\n");
11092 return;
11093 }
11094 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11095 tmp = ctxt->input->end;
11096 tmp--;
11097 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11098 if (tmp < ctxt->input->base) {
11099 *lastlt = NULL;
11100 *lastgt = NULL;
11101 } else {
11102 *lastlt = tmp;
11103 tmp++;
11104 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11105 if (*tmp == '\'') {
11106 tmp++;
11107 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11108 if (tmp < ctxt->input->end) tmp++;
11109 } else if (*tmp == '"') {
11110 tmp++;
11111 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11112 if (tmp < ctxt->input->end) tmp++;
11113 } else
11114 tmp++;
11115 }
11116 if (tmp < ctxt->input->end)
11117 *lastgt = tmp;
11118 else {
11119 tmp = *lastlt;
11120 tmp--;
11121 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11122 if (tmp >= ctxt->input->base)
11123 *lastgt = tmp;
11124 else
11125 *lastgt = NULL;
11126 }
11127 }
11128 } else {
11129 *lastlt = NULL;
11130 *lastgt = NULL;
11131 }
11132 }
11133 /**
11134 * xmlCheckCdataPush:
11135 * @cur: pointer to the block of characters
11136 * @len: length of the block in bytes
11137 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11138 *
11139 * Check that the block of characters is okay as SCdata content [20]
11140 *
11141 * Returns the number of bytes to pass if okay, a negative index where an
11142 * UTF-8 error occurred otherwise
11143 */
11144 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11145 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11146 int ix;
11147 unsigned char c;
11148 int codepoint;
11149
11150 if ((utf == NULL) || (len <= 0))
11151 return(0);
11152
11153 for (ix = 0; ix < len;) { /* string is 0-terminated */
11154 c = utf[ix];
11155 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11156 if (c >= 0x20)
11157 ix++;
11158 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11159 ix++;
11160 else
11161 return(-ix);
11162 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11163 if (ix + 2 > len) return(complete ? -ix : ix);
11164 if ((utf[ix+1] & 0xc0 ) != 0x80)
11165 return(-ix);
11166 codepoint = (utf[ix] & 0x1f) << 6;
11167 codepoint |= utf[ix+1] & 0x3f;
11168 if (!xmlIsCharQ(codepoint))
11169 return(-ix);
11170 ix += 2;
11171 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11172 if (ix + 3 > len) return(complete ? -ix : ix);
11173 if (((utf[ix+1] & 0xc0) != 0x80) ||
11174 ((utf[ix+2] & 0xc0) != 0x80))
11175 return(-ix);
11176 codepoint = (utf[ix] & 0xf) << 12;
11177 codepoint |= (utf[ix+1] & 0x3f) << 6;
11178 codepoint |= utf[ix+2] & 0x3f;
11179 if (!xmlIsCharQ(codepoint))
11180 return(-ix);
11181 ix += 3;
11182 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11183 if (ix + 4 > len) return(complete ? -ix : ix);
11184 if (((utf[ix+1] & 0xc0) != 0x80) ||
11185 ((utf[ix+2] & 0xc0) != 0x80) ||
11186 ((utf[ix+3] & 0xc0) != 0x80))
11187 return(-ix);
11188 codepoint = (utf[ix] & 0x7) << 18;
11189 codepoint |= (utf[ix+1] & 0x3f) << 12;
11190 codepoint |= (utf[ix+2] & 0x3f) << 6;
11191 codepoint |= utf[ix+3] & 0x3f;
11192 if (!xmlIsCharQ(codepoint))
11193 return(-ix);
11194 ix += 4;
11195 } else /* unknown encoding */
11196 return(-ix);
11197 }
11198 return(ix);
11199 }
11200
11201 /**
11202 * xmlParseTryOrFinish:
11203 * @ctxt: an XML parser context
11204 * @terminate: last chunk indicator
11205 *
11206 * Try to progress on parsing
11207 *
11208 * Returns zero if no parsing was possible
11209 */
11210 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11211 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11212 int ret = 0;
11213 int avail, tlen;
11214 xmlChar cur, next;
11215 const xmlChar *lastlt, *lastgt;
11216
11217 if (ctxt->input == NULL)
11218 return(0);
11219
11220 #ifdef DEBUG_PUSH
11221 switch (ctxt->instate) {
11222 case XML_PARSER_EOF:
11223 xmlGenericError(xmlGenericErrorContext,
11224 "PP: try EOF\n"); break;
11225 case XML_PARSER_START:
11226 xmlGenericError(xmlGenericErrorContext,
11227 "PP: try START\n"); break;
11228 case XML_PARSER_MISC:
11229 xmlGenericError(xmlGenericErrorContext,
11230 "PP: try MISC\n");break;
11231 case XML_PARSER_COMMENT:
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: try COMMENT\n");break;
11234 case XML_PARSER_PROLOG:
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: try PROLOG\n");break;
11237 case XML_PARSER_START_TAG:
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: try START_TAG\n");break;
11240 case XML_PARSER_CONTENT:
11241 xmlGenericError(xmlGenericErrorContext,
11242 "PP: try CONTENT\n");break;
11243 case XML_PARSER_CDATA_SECTION:
11244 xmlGenericError(xmlGenericErrorContext,
11245 "PP: try CDATA_SECTION\n");break;
11246 case XML_PARSER_END_TAG:
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: try END_TAG\n");break;
11249 case XML_PARSER_ENTITY_DECL:
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: try ENTITY_DECL\n");break;
11252 case XML_PARSER_ENTITY_VALUE:
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: try ENTITY_VALUE\n");break;
11255 case XML_PARSER_ATTRIBUTE_VALUE:
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: try ATTRIBUTE_VALUE\n");break;
11258 case XML_PARSER_DTD:
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: try DTD\n");break;
11261 case XML_PARSER_EPILOG:
11262 xmlGenericError(xmlGenericErrorContext,
11263 "PP: try EPILOG\n");break;
11264 case XML_PARSER_PI:
11265 xmlGenericError(xmlGenericErrorContext,
11266 "PP: try PI\n");break;
11267 case XML_PARSER_IGNORE:
11268 xmlGenericError(xmlGenericErrorContext,
11269 "PP: try IGNORE\n");break;
11270 }
11271 #endif
11272
11273 if ((ctxt->input != NULL) &&
11274 (ctxt->input->cur - ctxt->input->base > 4096)) {
11275 xmlSHRINK(ctxt);
11276 ctxt->checkIndex = 0;
11277 }
11278 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11279
11280 while (ctxt->instate != XML_PARSER_EOF) {
11281 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11282 return(0);
11283
11284 if (ctxt->input == NULL) break;
11285 if (ctxt->input->buf == NULL)
11286 avail = ctxt->input->length -
11287 (ctxt->input->cur - ctxt->input->base);
11288 else {
11289 /*
11290 * If we are operating on converted input, try to flush
11291 * remaining chars to avoid them stalling in the non-converted
11292 * buffer. But do not do this in document start where
11293 * encoding="..." may not have been read and we work on a
11294 * guessed encoding.
11295 */
11296 if ((ctxt->instate != XML_PARSER_START) &&
11297 (ctxt->input->buf->raw != NULL) &&
11298 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11299 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11300 ctxt->input);
11301 size_t current = ctxt->input->cur - ctxt->input->base;
11302
11303 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11304 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11305 base, current);
11306 }
11307 avail = xmlBufUse(ctxt->input->buf->buffer) -
11308 (ctxt->input->cur - ctxt->input->base);
11309 }
11310 if (avail < 1)
11311 goto done;
11312 switch (ctxt->instate) {
11313 case XML_PARSER_EOF:
11314 /*
11315 * Document parsing is done !
11316 */
11317 goto done;
11318 case XML_PARSER_START:
11319 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11320 xmlChar start[4];
11321 xmlCharEncoding enc;
11322
11323 /*
11324 * Very first chars read from the document flow.
11325 */
11326 if (avail < 4)
11327 goto done;
11328
11329 /*
11330 * Get the 4 first bytes and decode the charset
11331 * if enc != XML_CHAR_ENCODING_NONE
11332 * plug some encoding conversion routines,
11333 * else xmlSwitchEncoding will set to (default)
11334 * UTF8.
11335 */
11336 start[0] = RAW;
11337 start[1] = NXT(1);
11338 start[2] = NXT(2);
11339 start[3] = NXT(3);
11340 enc = xmlDetectCharEncoding(start, 4);
11341 xmlSwitchEncoding(ctxt, enc);
11342 break;
11343 }
11344
11345 if (avail < 2)
11346 goto done;
11347 cur = ctxt->input->cur[0];
11348 next = ctxt->input->cur[1];
11349 if (cur == 0) {
11350 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11351 ctxt->sax->setDocumentLocator(ctxt->userData,
11352 &xmlDefaultSAXLocator);
11353 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11354 xmlHaltParser(ctxt);
11355 #ifdef DEBUG_PUSH
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: entering EOF\n");
11358 #endif
11359 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11360 ctxt->sax->endDocument(ctxt->userData);
11361 goto done;
11362 }
11363 if ((cur == '<') && (next == '?')) {
11364 /* PI or XML decl */
11365 if (avail < 5) return(ret);
11366 if ((!terminate) &&
11367 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11368 return(ret);
11369 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11370 ctxt->sax->setDocumentLocator(ctxt->userData,
11371 &xmlDefaultSAXLocator);
11372 if ((ctxt->input->cur[2] == 'x') &&
11373 (ctxt->input->cur[3] == 'm') &&
11374 (ctxt->input->cur[4] == 'l') &&
11375 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11376 ret += 5;
11377 #ifdef DEBUG_PUSH
11378 xmlGenericError(xmlGenericErrorContext,
11379 "PP: Parsing XML Decl\n");
11380 #endif
11381 xmlParseXMLDecl(ctxt);
11382 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11383 /*
11384 * The XML REC instructs us to stop parsing right
11385 * here
11386 */
11387 xmlHaltParser(ctxt);
11388 return(0);
11389 }
11390 ctxt->standalone = ctxt->input->standalone;
11391 if ((ctxt->encoding == NULL) &&
11392 (ctxt->input->encoding != NULL))
11393 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11394 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11395 (!ctxt->disableSAX))
11396 ctxt->sax->startDocument(ctxt->userData);
11397 ctxt->instate = XML_PARSER_MISC;
11398 #ifdef DEBUG_PUSH
11399 xmlGenericError(xmlGenericErrorContext,
11400 "PP: entering MISC\n");
11401 #endif
11402 } else {
11403 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11404 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11405 (!ctxt->disableSAX))
11406 ctxt->sax->startDocument(ctxt->userData);
11407 ctxt->instate = XML_PARSER_MISC;
11408 #ifdef DEBUG_PUSH
11409 xmlGenericError(xmlGenericErrorContext,
11410 "PP: entering MISC\n");
11411 #endif
11412 }
11413 } else {
11414 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11415 ctxt->sax->setDocumentLocator(ctxt->userData,
11416 &xmlDefaultSAXLocator);
11417 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11418 if (ctxt->version == NULL) {
11419 xmlErrMemory(ctxt, NULL);
11420 break;
11421 }
11422 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11423 (!ctxt->disableSAX))
11424 ctxt->sax->startDocument(ctxt->userData);
11425 ctxt->instate = XML_PARSER_MISC;
11426 #ifdef DEBUG_PUSH
11427 xmlGenericError(xmlGenericErrorContext,
11428 "PP: entering MISC\n");
11429 #endif
11430 }
11431 break;
11432 case XML_PARSER_START_TAG: {
11433 const xmlChar *name;
11434 const xmlChar *prefix = NULL;
11435 const xmlChar *URI = NULL;
11436 int line = ctxt->input->line;
11437 int nsNr = ctxt->nsNr;
11438
11439 if ((avail < 2) && (ctxt->inputNr == 1))
11440 goto done;
11441 cur = ctxt->input->cur[0];
11442 if (cur != '<') {
11443 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11444 xmlHaltParser(ctxt);
11445 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446 ctxt->sax->endDocument(ctxt->userData);
11447 goto done;
11448 }
11449 if (!terminate) {
11450 if (ctxt->progressive) {
11451 /* > can be found unescaped in attribute values */
11452 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11453 goto done;
11454 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11455 goto done;
11456 }
11457 }
11458 if (ctxt->spaceNr == 0)
11459 spacePush(ctxt, -1);
11460 else if (*ctxt->space == -2)
11461 spacePush(ctxt, -1);
11462 else
11463 spacePush(ctxt, *ctxt->space);
11464 #ifdef LIBXML_SAX1_ENABLED
11465 if (ctxt->sax2)
11466 #endif /* LIBXML_SAX1_ENABLED */
11467 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11468 #ifdef LIBXML_SAX1_ENABLED
11469 else
11470 name = xmlParseStartTag(ctxt);
11471 #endif /* LIBXML_SAX1_ENABLED */
11472 if (ctxt->instate == XML_PARSER_EOF)
11473 goto done;
11474 if (name == NULL) {
11475 spacePop(ctxt);
11476 xmlHaltParser(ctxt);
11477 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11478 ctxt->sax->endDocument(ctxt->userData);
11479 goto done;
11480 }
11481 #ifdef LIBXML_VALID_ENABLED
11482 /*
11483 * [ VC: Root Element Type ]
11484 * The Name in the document type declaration must match
11485 * the element type of the root element.
11486 */
11487 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11488 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11489 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11490 #endif /* LIBXML_VALID_ENABLED */
11491
11492 /*
11493 * Check for an Empty Element.
11494 */
11495 if ((RAW == '/') && (NXT(1) == '>')) {
11496 SKIP(2);
11497
11498 if (ctxt->sax2) {
11499 if ((ctxt->sax != NULL) &&
11500 (ctxt->sax->endElementNs != NULL) &&
11501 (!ctxt->disableSAX))
11502 ctxt->sax->endElementNs(ctxt->userData, name,
11503 prefix, URI);
11504 if (ctxt->nsNr - nsNr > 0)
11505 nsPop(ctxt, ctxt->nsNr - nsNr);
11506 #ifdef LIBXML_SAX1_ENABLED
11507 } else {
11508 if ((ctxt->sax != NULL) &&
11509 (ctxt->sax->endElement != NULL) &&
11510 (!ctxt->disableSAX))
11511 ctxt->sax->endElement(ctxt->userData, name);
11512 #endif /* LIBXML_SAX1_ENABLED */
11513 }
11514 if (ctxt->instate == XML_PARSER_EOF)
11515 goto done;
11516 spacePop(ctxt);
11517 if (ctxt->nameNr == 0) {
11518 ctxt->instate = XML_PARSER_EPILOG;
11519 } else {
11520 ctxt->instate = XML_PARSER_CONTENT;
11521 }
11522 ctxt->progressive = 1;
11523 break;
11524 }
11525 if (RAW == '>') {
11526 NEXT;
11527 } else {
11528 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11529 "Couldn't find end of Start Tag %s\n",
11530 name);
11531 nodePop(ctxt);
11532 spacePop(ctxt);
11533 }
11534 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11535
11536 ctxt->instate = XML_PARSER_CONTENT;
11537 ctxt->progressive = 1;
11538 break;
11539 }
11540 case XML_PARSER_CONTENT: {
11541 const xmlChar *test;
11542 unsigned int cons;
11543 if ((avail < 2) && (ctxt->inputNr == 1))
11544 goto done;
11545 cur = ctxt->input->cur[0];
11546 next = ctxt->input->cur[1];
11547
11548 test = CUR_PTR;
11549 cons = ctxt->input->consumed;
11550 if ((cur == '<') && (next == '/')) {
11551 ctxt->instate = XML_PARSER_END_TAG;
11552 break;
11553 } else if ((cur == '<') && (next == '?')) {
11554 if ((!terminate) &&
11555 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11556 ctxt->progressive = XML_PARSER_PI;
11557 goto done;
11558 }
11559 xmlParsePI(ctxt);
11560 ctxt->instate = XML_PARSER_CONTENT;
11561 ctxt->progressive = 1;
11562 } else if ((cur == '<') && (next != '!')) {
11563 ctxt->instate = XML_PARSER_START_TAG;
11564 break;
11565 } else if ((cur == '<') && (next == '!') &&
11566 (ctxt->input->cur[2] == '-') &&
11567 (ctxt->input->cur[3] == '-')) {
11568 int term;
11569
11570 if (avail < 4)
11571 goto done;
11572 ctxt->input->cur += 4;
11573 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11574 ctxt->input->cur -= 4;
11575 if ((!terminate) && (term < 0)) {
11576 ctxt->progressive = XML_PARSER_COMMENT;
11577 goto done;
11578 }
11579 xmlParseComment(ctxt);
11580 ctxt->instate = XML_PARSER_CONTENT;
11581 ctxt->progressive = 1;
11582 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11583 (ctxt->input->cur[2] == '[') &&
11584 (ctxt->input->cur[3] == 'C') &&
11585 (ctxt->input->cur[4] == 'D') &&
11586 (ctxt->input->cur[5] == 'A') &&
11587 (ctxt->input->cur[6] == 'T') &&
11588 (ctxt->input->cur[7] == 'A') &&
11589 (ctxt->input->cur[8] == '[')) {
11590 SKIP(9);
11591 ctxt->instate = XML_PARSER_CDATA_SECTION;
11592 break;
11593 } else if ((cur == '<') && (next == '!') &&
11594 (avail < 9)) {
11595 goto done;
11596 } else if (cur == '&') {
11597 if ((!terminate) &&
11598 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11599 goto done;
11600 xmlParseReference(ctxt);
11601 } else {
11602 /* TODO Avoid the extra copy, handle directly !!! */
11603 /*
11604 * Goal of the following test is:
11605 * - minimize calls to the SAX 'character' callback
11606 * when they are mergeable
11607 * - handle an problem for isBlank when we only parse
11608 * a sequence of blank chars and the next one is
11609 * not available to check against '<' presence.
11610 * - tries to homogenize the differences in SAX
11611 * callbacks between the push and pull versions
11612 * of the parser.
11613 */
11614 if ((ctxt->inputNr == 1) &&
11615 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11616 if (!terminate) {
11617 if (ctxt->progressive) {
11618 if ((lastlt == NULL) ||
11619 (ctxt->input->cur > lastlt))
11620 goto done;
11621 } else if (xmlParseLookupSequence(ctxt,
11622 '<', 0, 0) < 0) {
11623 goto done;
11624 }
11625 }
11626 }
11627 ctxt->checkIndex = 0;
11628 xmlParseCharData(ctxt, 0);
11629 }
11630 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11631 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11632 "detected an error in element content\n");
11633 xmlHaltParser(ctxt);
11634 break;
11635 }
11636 break;
11637 }
11638 case XML_PARSER_END_TAG:
11639 if (avail < 2)
11640 goto done;
11641 if (!terminate) {
11642 if (ctxt->progressive) {
11643 /* > can be found unescaped in attribute values */
11644 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11645 goto done;
11646 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11647 goto done;
11648 }
11649 }
11650 if (ctxt->sax2) {
11651 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11652 nameNsPop(ctxt);
11653 }
11654 #ifdef LIBXML_SAX1_ENABLED
11655 else
11656 xmlParseEndTag1(ctxt, 0);
11657 #endif /* LIBXML_SAX1_ENABLED */
11658 if (ctxt->instate == XML_PARSER_EOF) {
11659 /* Nothing */
11660 } else if (ctxt->nameNr == 0) {
11661 ctxt->instate = XML_PARSER_EPILOG;
11662 } else {
11663 ctxt->instate = XML_PARSER_CONTENT;
11664 }
11665 break;
11666 case XML_PARSER_CDATA_SECTION: {
11667 /*
11668 * The Push mode need to have the SAX callback for
11669 * cdataBlock merge back contiguous callbacks.
11670 */
11671 int base;
11672
11673 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11674 if (base < 0) {
11675 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11676 int tmp;
11677
11678 tmp = xmlCheckCdataPush(ctxt->input->cur,
11679 XML_PARSER_BIG_BUFFER_SIZE, 0);
11680 if (tmp < 0) {
11681 tmp = -tmp;
11682 ctxt->input->cur += tmp;
11683 goto encoding_error;
11684 }
11685 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11686 if (ctxt->sax->cdataBlock != NULL)
11687 ctxt->sax->cdataBlock(ctxt->userData,
11688 ctxt->input->cur, tmp);
11689 else if (ctxt->sax->characters != NULL)
11690 ctxt->sax->characters(ctxt->userData,
11691 ctxt->input->cur, tmp);
11692 }
11693 if (ctxt->instate == XML_PARSER_EOF)
11694 goto done;
11695 SKIPL(tmp);
11696 ctxt->checkIndex = 0;
11697 }
11698 goto done;
11699 } else {
11700 int tmp;
11701
11702 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11703 if ((tmp < 0) || (tmp != base)) {
11704 tmp = -tmp;
11705 ctxt->input->cur += tmp;
11706 goto encoding_error;
11707 }
11708 if ((ctxt->sax != NULL) && (base == 0) &&
11709 (ctxt->sax->cdataBlock != NULL) &&
11710 (!ctxt->disableSAX)) {
11711 /*
11712 * Special case to provide identical behaviour
11713 * between pull and push parsers on enpty CDATA
11714 * sections
11715 */
11716 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11717 (!strncmp((const char *)&ctxt->input->cur[-9],
11718 "<![CDATA[", 9)))
11719 ctxt->sax->cdataBlock(ctxt->userData,
11720 BAD_CAST "", 0);
11721 } else if ((ctxt->sax != NULL) && (base > 0) &&
11722 (!ctxt->disableSAX)) {
11723 if (ctxt->sax->cdataBlock != NULL)
11724 ctxt->sax->cdataBlock(ctxt->userData,
11725 ctxt->input->cur, base);
11726 else if (ctxt->sax->characters != NULL)
11727 ctxt->sax->characters(ctxt->userData,
11728 ctxt->input->cur, base);
11729 }
11730 if (ctxt->instate == XML_PARSER_EOF)
11731 goto done;
11732 SKIPL(base + 3);
11733 ctxt->checkIndex = 0;
11734 ctxt->instate = XML_PARSER_CONTENT;
11735 #ifdef DEBUG_PUSH
11736 xmlGenericError(xmlGenericErrorContext,
11737 "PP: entering CONTENT\n");
11738 #endif
11739 }
11740 break;
11741 }
11742 case XML_PARSER_MISC:
11743 SKIP_BLANKS;
11744 if (ctxt->input->buf == NULL)
11745 avail = ctxt->input->length -
11746 (ctxt->input->cur - ctxt->input->base);
11747 else
11748 avail = xmlBufUse(ctxt->input->buf->buffer) -
11749 (ctxt->input->cur - ctxt->input->base);
11750 if (avail < 2)
11751 goto done;
11752 cur = ctxt->input->cur[0];
11753 next = ctxt->input->cur[1];
11754 if ((cur == '<') && (next == '?')) {
11755 if ((!terminate) &&
11756 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11757 ctxt->progressive = XML_PARSER_PI;
11758 goto done;
11759 }
11760 #ifdef DEBUG_PUSH
11761 xmlGenericError(xmlGenericErrorContext,
11762 "PP: Parsing PI\n");
11763 #endif
11764 xmlParsePI(ctxt);
11765 if (ctxt->instate == XML_PARSER_EOF)
11766 goto done;
11767 ctxt->instate = XML_PARSER_MISC;
11768 ctxt->progressive = 1;
11769 ctxt->checkIndex = 0;
11770 } else if ((cur == '<') && (next == '!') &&
11771 (ctxt->input->cur[2] == '-') &&
11772 (ctxt->input->cur[3] == '-')) {
11773 if ((!terminate) &&
11774 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11775 ctxt->progressive = XML_PARSER_COMMENT;
11776 goto done;
11777 }
11778 #ifdef DEBUG_PUSH
11779 xmlGenericError(xmlGenericErrorContext,
11780 "PP: Parsing Comment\n");
11781 #endif
11782 xmlParseComment(ctxt);
11783 if (ctxt->instate == XML_PARSER_EOF)
11784 goto done;
11785 ctxt->instate = XML_PARSER_MISC;
11786 ctxt->progressive = 1;
11787 ctxt->checkIndex = 0;
11788 } else if ((cur == '<') && (next == '!') &&
11789 (ctxt->input->cur[2] == 'D') &&
11790 (ctxt->input->cur[3] == 'O') &&
11791 (ctxt->input->cur[4] == 'C') &&
11792 (ctxt->input->cur[5] == 'T') &&
11793 (ctxt->input->cur[6] == 'Y') &&
11794 (ctxt->input->cur[7] == 'P') &&
11795 (ctxt->input->cur[8] == 'E')) {
11796 if ((!terminate) &&
11797 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11798 ctxt->progressive = XML_PARSER_DTD;
11799 goto done;
11800 }
11801 #ifdef DEBUG_PUSH
11802 xmlGenericError(xmlGenericErrorContext,
11803 "PP: Parsing internal subset\n");
11804 #endif
11805 ctxt->inSubset = 1;
11806 ctxt->progressive = 0;
11807 ctxt->checkIndex = 0;
11808 xmlParseDocTypeDecl(ctxt);
11809 if (ctxt->instate == XML_PARSER_EOF)
11810 goto done;
11811 if (RAW == '[') {
11812 ctxt->instate = XML_PARSER_DTD;
11813 #ifdef DEBUG_PUSH
11814 xmlGenericError(xmlGenericErrorContext,
11815 "PP: entering DTD\n");
11816 #endif
11817 } else {
11818 /*
11819 * Create and update the external subset.
11820 */
11821 ctxt->inSubset = 2;
11822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11823 (ctxt->sax->externalSubset != NULL))
11824 ctxt->sax->externalSubset(ctxt->userData,
11825 ctxt->intSubName, ctxt->extSubSystem,
11826 ctxt->extSubURI);
11827 ctxt->inSubset = 0;
11828 xmlCleanSpecialAttr(ctxt);
11829 ctxt->instate = XML_PARSER_PROLOG;
11830 #ifdef DEBUG_PUSH
11831 xmlGenericError(xmlGenericErrorContext,
11832 "PP: entering PROLOG\n");
11833 #endif
11834 }
11835 } else if ((cur == '<') && (next == '!') &&
11836 (avail < 9)) {
11837 goto done;
11838 } else {
11839 ctxt->instate = XML_PARSER_START_TAG;
11840 ctxt->progressive = XML_PARSER_START_TAG;
11841 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11842 #ifdef DEBUG_PUSH
11843 xmlGenericError(xmlGenericErrorContext,
11844 "PP: entering START_TAG\n");
11845 #endif
11846 }
11847 break;
11848 case XML_PARSER_PROLOG:
11849 SKIP_BLANKS;
11850 if (ctxt->input->buf == NULL)
11851 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11852 else
11853 avail = xmlBufUse(ctxt->input->buf->buffer) -
11854 (ctxt->input->cur - ctxt->input->base);
11855 if (avail < 2)
11856 goto done;
11857 cur = ctxt->input->cur[0];
11858 next = ctxt->input->cur[1];
11859 if ((cur == '<') && (next == '?')) {
11860 if ((!terminate) &&
11861 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11862 ctxt->progressive = XML_PARSER_PI;
11863 goto done;
11864 }
11865 #ifdef DEBUG_PUSH
11866 xmlGenericError(xmlGenericErrorContext,
11867 "PP: Parsing PI\n");
11868 #endif
11869 xmlParsePI(ctxt);
11870 if (ctxt->instate == XML_PARSER_EOF)
11871 goto done;
11872 ctxt->instate = XML_PARSER_PROLOG;
11873 ctxt->progressive = 1;
11874 } else if ((cur == '<') && (next == '!') &&
11875 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11876 if ((!terminate) &&
11877 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11878 ctxt->progressive = XML_PARSER_COMMENT;
11879 goto done;
11880 }
11881 #ifdef DEBUG_PUSH
11882 xmlGenericError(xmlGenericErrorContext,
11883 "PP: Parsing Comment\n");
11884 #endif
11885 xmlParseComment(ctxt);
11886 if (ctxt->instate == XML_PARSER_EOF)
11887 goto done;
11888 ctxt->instate = XML_PARSER_PROLOG;
11889 ctxt->progressive = 1;
11890 } else if ((cur == '<') && (next == '!') &&
11891 (avail < 4)) {
11892 goto done;
11893 } else {
11894 ctxt->instate = XML_PARSER_START_TAG;
11895 if (ctxt->progressive == 0)
11896 ctxt->progressive = XML_PARSER_START_TAG;
11897 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11898 #ifdef DEBUG_PUSH
11899 xmlGenericError(xmlGenericErrorContext,
11900 "PP: entering START_TAG\n");
11901 #endif
11902 }
11903 break;
11904 case XML_PARSER_EPILOG:
11905 SKIP_BLANKS;
11906 if (ctxt->input->buf == NULL)
11907 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11908 else
11909 avail = xmlBufUse(ctxt->input->buf->buffer) -
11910 (ctxt->input->cur - ctxt->input->base);
11911 if (avail < 2)
11912 goto done;
11913 cur = ctxt->input->cur[0];
11914 next = ctxt->input->cur[1];
11915 if ((cur == '<') && (next == '?')) {
11916 if ((!terminate) &&
11917 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11918 ctxt->progressive = XML_PARSER_PI;
11919 goto done;
11920 }
11921 #ifdef DEBUG_PUSH
11922 xmlGenericError(xmlGenericErrorContext,
11923 "PP: Parsing PI\n");
11924 #endif
11925 xmlParsePI(ctxt);
11926 if (ctxt->instate == XML_PARSER_EOF)
11927 goto done;
11928 ctxt->instate = XML_PARSER_EPILOG;
11929 ctxt->progressive = 1;
11930 } else if ((cur == '<') && (next == '!') &&
11931 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11932 if ((!terminate) &&
11933 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11934 ctxt->progressive = XML_PARSER_COMMENT;
11935 goto done;
11936 }
11937 #ifdef DEBUG_PUSH
11938 xmlGenericError(xmlGenericErrorContext,
11939 "PP: Parsing Comment\n");
11940 #endif
11941 xmlParseComment(ctxt);
11942 if (ctxt->instate == XML_PARSER_EOF)
11943 goto done;
11944 ctxt->instate = XML_PARSER_EPILOG;
11945 ctxt->progressive = 1;
11946 } else if ((cur == '<') && (next == '!') &&
11947 (avail < 4)) {
11948 goto done;
11949 } else {
11950 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11951 xmlHaltParser(ctxt);
11952 #ifdef DEBUG_PUSH
11953 xmlGenericError(xmlGenericErrorContext,
11954 "PP: entering EOF\n");
11955 #endif
11956 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11957 ctxt->sax->endDocument(ctxt->userData);
11958 goto done;
11959 }
11960 break;
11961 case XML_PARSER_DTD: {
11962 /*
11963 * Sorry but progressive parsing of the internal subset
11964 * is not expected to be supported. We first check that
11965 * the full content of the internal subset is available and
11966 * the parsing is launched only at that point.
11967 * Internal subset ends up with "']' S? '>'" in an unescaped
11968 * section and not in a ']]>' sequence which are conditional
11969 * sections (whoever argued to keep that crap in XML deserve
11970 * a place in hell !).
11971 */
11972 int base, i;
11973 xmlChar *buf;
11974 xmlChar quote = 0;
11975 size_t use;
11976
11977 base = ctxt->input->cur - ctxt->input->base;
11978 if (base < 0) return(0);
11979 if (ctxt->checkIndex > base)
11980 base = ctxt->checkIndex;
11981 buf = xmlBufContent(ctxt->input->buf->buffer);
11982 use = xmlBufUse(ctxt->input->buf->buffer);
11983 for (;(unsigned int) base < use; base++) {
11984 if (quote != 0) {
11985 if (buf[base] == quote)
11986 quote = 0;
11987 continue;
11988 }
11989 if ((quote == 0) && (buf[base] == '<')) {
11990 int found = 0;
11991 /* special handling of comments */
11992 if (((unsigned int) base + 4 < use) &&
11993 (buf[base + 1] == '!') &&
11994 (buf[base + 2] == '-') &&
11995 (buf[base + 3] == '-')) {
11996 for (;(unsigned int) base + 3 < use; base++) {
11997 if ((buf[base] == '-') &&
11998 (buf[base + 1] == '-') &&
11999 (buf[base + 2] == '>')) {
12000 found = 1;
12001 base += 2;
12002 break;
12003 }
12004 }
12005 if (!found) {
12006 #if 0
12007 fprintf(stderr, "unfinished comment\n");
12008 #endif
12009 break; /* for */
12010 }
12011 continue;
12012 }
12013 }
12014 if (buf[base] == '"') {
12015 quote = '"';
12016 continue;
12017 }
12018 if (buf[base] == '\'') {
12019 quote = '\'';
12020 continue;
12021 }
12022 if (buf[base] == ']') {
12023 #if 0
12024 fprintf(stderr, "%c%c%c%c: ", buf[base],
12025 buf[base + 1], buf[base + 2], buf[base + 3]);
12026 #endif
12027 if ((unsigned int) base +1 >= use)
12028 break;
12029 if (buf[base + 1] == ']') {
12030 /* conditional crap, skip both ']' ! */
12031 base++;
12032 continue;
12033 }
12034 for (i = 1; (unsigned int) base + i < use; i++) {
12035 if (buf[base + i] == '>') {
12036 #if 0
12037 fprintf(stderr, "found\n");
12038 #endif
12039 goto found_end_int_subset;
12040 }
12041 if (!IS_BLANK_CH(buf[base + i])) {
12042 #if 0
12043 fprintf(stderr, "not found\n");
12044 #endif
12045 goto not_end_of_int_subset;
12046 }
12047 }
12048 #if 0
12049 fprintf(stderr, "end of stream\n");
12050 #endif
12051 break;
12052
12053 }
12054 not_end_of_int_subset:
12055 continue; /* for */
12056 }
12057 /*
12058 * We didn't found the end of the Internal subset
12059 */
12060 if (quote == 0)
12061 ctxt->checkIndex = base;
12062 else
12063 ctxt->checkIndex = 0;
12064 #ifdef DEBUG_PUSH
12065 if (next == 0)
12066 xmlGenericError(xmlGenericErrorContext,
12067 "PP: lookup of int subset end filed\n");
12068 #endif
12069 goto done;
12070
12071 found_end_int_subset:
12072 ctxt->checkIndex = 0;
12073 xmlParseInternalSubset(ctxt);
12074 if (ctxt->instate == XML_PARSER_EOF)
12075 goto done;
12076 ctxt->inSubset = 2;
12077 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12078 (ctxt->sax->externalSubset != NULL))
12079 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12080 ctxt->extSubSystem, ctxt->extSubURI);
12081 ctxt->inSubset = 0;
12082 xmlCleanSpecialAttr(ctxt);
12083 if (ctxt->instate == XML_PARSER_EOF)
12084 goto done;
12085 ctxt->instate = XML_PARSER_PROLOG;
12086 ctxt->checkIndex = 0;
12087 #ifdef DEBUG_PUSH
12088 xmlGenericError(xmlGenericErrorContext,
12089 "PP: entering PROLOG\n");
12090 #endif
12091 break;
12092 }
12093 case XML_PARSER_COMMENT:
12094 xmlGenericError(xmlGenericErrorContext,
12095 "PP: internal error, state == COMMENT\n");
12096 ctxt->instate = XML_PARSER_CONTENT;
12097 #ifdef DEBUG_PUSH
12098 xmlGenericError(xmlGenericErrorContext,
12099 "PP: entering CONTENT\n");
12100 #endif
12101 break;
12102 case XML_PARSER_IGNORE:
12103 xmlGenericError(xmlGenericErrorContext,
12104 "PP: internal error, state == IGNORE");
12105 ctxt->instate = XML_PARSER_DTD;
12106 #ifdef DEBUG_PUSH
12107 xmlGenericError(xmlGenericErrorContext,
12108 "PP: entering DTD\n");
12109 #endif
12110 break;
12111 case XML_PARSER_PI:
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: internal error, state == PI\n");
12114 ctxt->instate = XML_PARSER_CONTENT;
12115 #ifdef DEBUG_PUSH
12116 xmlGenericError(xmlGenericErrorContext,
12117 "PP: entering CONTENT\n");
12118 #endif
12119 break;
12120 case XML_PARSER_ENTITY_DECL:
12121 xmlGenericError(xmlGenericErrorContext,
12122 "PP: internal error, state == ENTITY_DECL\n");
12123 ctxt->instate = XML_PARSER_DTD;
12124 #ifdef DEBUG_PUSH
12125 xmlGenericError(xmlGenericErrorContext,
12126 "PP: entering DTD\n");
12127 #endif
12128 break;
12129 case XML_PARSER_ENTITY_VALUE:
12130 xmlGenericError(xmlGenericErrorContext,
12131 "PP: internal error, state == ENTITY_VALUE\n");
12132 ctxt->instate = XML_PARSER_CONTENT;
12133 #ifdef DEBUG_PUSH
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: entering DTD\n");
12136 #endif
12137 break;
12138 case XML_PARSER_ATTRIBUTE_VALUE:
12139 xmlGenericError(xmlGenericErrorContext,
12140 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12141 ctxt->instate = XML_PARSER_START_TAG;
12142 #ifdef DEBUG_PUSH
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: entering START_TAG\n");
12145 #endif
12146 break;
12147 case XML_PARSER_SYSTEM_LITERAL:
12148 xmlGenericError(xmlGenericErrorContext,
12149 "PP: internal error, state == SYSTEM_LITERAL\n");
12150 ctxt->instate = XML_PARSER_START_TAG;
12151 #ifdef DEBUG_PUSH
12152 xmlGenericError(xmlGenericErrorContext,
12153 "PP: entering START_TAG\n");
12154 #endif
12155 break;
12156 case XML_PARSER_PUBLIC_LITERAL:
12157 xmlGenericError(xmlGenericErrorContext,
12158 "PP: internal error, state == PUBLIC_LITERAL\n");
12159 ctxt->instate = XML_PARSER_START_TAG;
12160 #ifdef DEBUG_PUSH
12161 xmlGenericError(xmlGenericErrorContext,
12162 "PP: entering START_TAG\n");
12163 #endif
12164 break;
12165 }
12166 }
12167 done:
12168 #ifdef DEBUG_PUSH
12169 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12170 #endif
12171 return(ret);
12172 encoding_error:
12173 if (ctxt->input->end - ctxt->input->cur < 4) {
12174 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12175 "Input is not proper UTF-8, indicate encoding !\n",
12176 NULL, NULL);
12177 } else {
12178 char buffer[150];
12179
12180 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12181 ctxt->input->cur[0], ctxt->input->cur[1],
12182 ctxt->input->cur[2], ctxt->input->cur[3]);
12183 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12184 "Input is not proper UTF-8, indicate encoding !\n%s",
12185 BAD_CAST buffer, NULL);
12186 }
12187 return(0);
12188 }
12189
12190 /**
12191 * xmlParseCheckTransition:
12192 * @ctxt: an XML parser context
12193 * @chunk: a char array
12194 * @size: the size in byte of the chunk
12195 *
12196 * Check depending on the current parser state if the chunk given must be
12197 * processed immediately or one need more data to advance on parsing.
12198 *
12199 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12200 */
12201 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12202 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12203 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12204 return(-1);
12205 if (ctxt->instate == XML_PARSER_START_TAG) {
12206 if (memchr(chunk, '>', size) != NULL)
12207 return(1);
12208 return(0);
12209 }
12210 if (ctxt->progressive == XML_PARSER_COMMENT) {
12211 if (memchr(chunk, '>', size) != NULL)
12212 return(1);
12213 return(0);
12214 }
12215 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12216 if (memchr(chunk, '>', size) != NULL)
12217 return(1);
12218 return(0);
12219 }
12220 if (ctxt->progressive == XML_PARSER_PI) {
12221 if (memchr(chunk, '>', size) != NULL)
12222 return(1);
12223 return(0);
12224 }
12225 if (ctxt->instate == XML_PARSER_END_TAG) {
12226 if (memchr(chunk, '>', size) != NULL)
12227 return(1);
12228 return(0);
12229 }
12230 if ((ctxt->progressive == XML_PARSER_DTD) ||
12231 (ctxt->instate == XML_PARSER_DTD)) {
12232 if (memchr(chunk, '>', size) != NULL)
12233 return(1);
12234 return(0);
12235 }
12236 return(1);
12237 }
12238
12239 /**
12240 * xmlParseChunk:
12241 * @ctxt: an XML parser context
12242 * @chunk: an char array
12243 * @size: the size in byte of the chunk
12244 * @terminate: last chunk indicator
12245 *
12246 * Parse a Chunk of memory
12247 *
12248 * Returns zero if no error, the xmlParserErrors otherwise.
12249 */
12250 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12251 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12252 int terminate) {
12253 int end_in_lf = 0;
12254 int remain = 0;
12255 size_t old_avail = 0;
12256 size_t avail = 0;
12257
12258 if (ctxt == NULL)
12259 return(XML_ERR_INTERNAL_ERROR);
12260 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12261 return(ctxt->errNo);
12262 if (ctxt->instate == XML_PARSER_EOF)
12263 return(-1);
12264 if (ctxt->instate == XML_PARSER_START)
12265 xmlDetectSAX2(ctxt);
12266 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12267 (chunk[size - 1] == '\r')) {
12268 end_in_lf = 1;
12269 size--;
12270 }
12271
12272 xmldecl_done:
12273
12274 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12275 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12276 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12277 size_t cur = ctxt->input->cur - ctxt->input->base;
12278 int res;
12279
12280 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12281 /*
12282 * Specific handling if we autodetected an encoding, we should not
12283 * push more than the first line ... which depend on the encoding
12284 * And only push the rest once the final encoding was detected
12285 */
12286 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12287 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12288 unsigned int len = 45;
12289
12290 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12291 BAD_CAST "UTF-16")) ||
12292 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12293 BAD_CAST "UTF16")))
12294 len = 90;
12295 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12296 BAD_CAST "UCS-4")) ||
12297 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12298 BAD_CAST "UCS4")))
12299 len = 180;
12300
12301 if (ctxt->input->buf->rawconsumed < len)
12302 len -= ctxt->input->buf->rawconsumed;
12303
12304 /*
12305 * Change size for reading the initial declaration only
12306 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12307 * will blindly copy extra bytes from memory.
12308 */
12309 if ((unsigned int) size > len) {
12310 remain = size - len;
12311 size = len;
12312 } else {
12313 remain = 0;
12314 }
12315 }
12316 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12317 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12318 if (res < 0) {
12319 ctxt->errNo = XML_PARSER_EOF;
12320 xmlHaltParser(ctxt);
12321 return (XML_PARSER_EOF);
12322 }
12323 #ifdef DEBUG_PUSH
12324 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12325 #endif
12326
12327 } else if (ctxt->instate != XML_PARSER_EOF) {
12328 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12329 xmlParserInputBufferPtr in = ctxt->input->buf;
12330 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12331 (in->raw != NULL)) {
12332 int nbchars;
12333 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12334 size_t current = ctxt->input->cur - ctxt->input->base;
12335
12336 nbchars = xmlCharEncInput(in, terminate);
12337 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12338 if (nbchars < 0) {
12339 /* TODO 2.6.0 */
12340 xmlGenericError(xmlGenericErrorContext,
12341 "xmlParseChunk: encoder error\n");
12342 xmlHaltParser(ctxt);
12343 return(XML_ERR_INVALID_ENCODING);
12344 }
12345 }
12346 }
12347 }
12348 if (remain != 0) {
12349 xmlParseTryOrFinish(ctxt, 0);
12350 } else {
12351 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12352 avail = xmlBufUse(ctxt->input->buf->buffer);
12353 /*
12354 * Depending on the current state it may not be such
12355 * a good idea to try parsing if there is nothing in the chunk
12356 * which would be worth doing a parser state transition and we
12357 * need to wait for more data
12358 */
12359 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12360 (old_avail == 0) || (avail == 0) ||
12361 (xmlParseCheckTransition(ctxt,
12362 (const char *)&ctxt->input->base[old_avail],
12363 avail - old_avail)))
12364 xmlParseTryOrFinish(ctxt, terminate);
12365 }
12366 if (ctxt->instate == XML_PARSER_EOF)
12367 return(ctxt->errNo);
12368
12369 if ((ctxt->input != NULL) &&
12370 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12371 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12372 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12373 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12374 xmlHaltParser(ctxt);
12375 }
12376 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12377 return(ctxt->errNo);
12378
12379 if (remain != 0) {
12380 chunk += size;
12381 size = remain;
12382 remain = 0;
12383 goto xmldecl_done;
12384 }
12385 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12386 (ctxt->input->buf != NULL)) {
12387 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12388 ctxt->input);
12389 size_t current = ctxt->input->cur - ctxt->input->base;
12390
12391 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12392
12393 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12394 base, current);
12395 }
12396 if (terminate) {
12397 /*
12398 * Check for termination
12399 */
12400 int cur_avail = 0;
12401
12402 if (ctxt->input != NULL) {
12403 if (ctxt->input->buf == NULL)
12404 cur_avail = ctxt->input->length -
12405 (ctxt->input->cur - ctxt->input->base);
12406 else
12407 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12408 (ctxt->input->cur - ctxt->input->base);
12409 }
12410
12411 if ((ctxt->instate != XML_PARSER_EOF) &&
12412 (ctxt->instate != XML_PARSER_EPILOG)) {
12413 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12414 }
12415 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12416 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12417 }
12418 if (ctxt->instate != XML_PARSER_EOF) {
12419 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12420 ctxt->sax->endDocument(ctxt->userData);
12421 }
12422 ctxt->instate = XML_PARSER_EOF;
12423 }
12424 if (ctxt->wellFormed == 0)
12425 return((xmlParserErrors) ctxt->errNo);
12426 else
12427 return(0);
12428 }
12429
12430 /************************************************************************
12431 * *
12432 * I/O front end functions to the parser *
12433 * *
12434 ************************************************************************/
12435
12436 /**
12437 * xmlCreatePushParserCtxt:
12438 * @sax: a SAX handler
12439 * @user_data: The user data returned on SAX callbacks
12440 * @chunk: a pointer to an array of chars
12441 * @size: number of chars in the array
12442 * @filename: an optional file name or URI
12443 *
12444 * Create a parser context for using the XML parser in push mode.
12445 * If @buffer and @size are non-NULL, the data is used to detect
12446 * the encoding. The remaining characters will be parsed so they
12447 * don't need to be fed in again through xmlParseChunk.
12448 * To allow content encoding detection, @size should be >= 4
12449 * The value of @filename is used for fetching external entities
12450 * and error/warning reports.
12451 *
12452 * Returns the new parser context or NULL
12453 */
12454
12455 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12456 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12457 const char *chunk, int size, const char *filename) {
12458 xmlParserCtxtPtr ctxt;
12459 xmlParserInputPtr inputStream;
12460 xmlParserInputBufferPtr buf;
12461 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12462
12463 /*
12464 * plug some encoding conversion routines
12465 */
12466 if ((chunk != NULL) && (size >= 4))
12467 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12468
12469 buf = xmlAllocParserInputBuffer(enc);
12470 if (buf == NULL) return(NULL);
12471
12472 ctxt = xmlNewParserCtxt();
12473 if (ctxt == NULL) {
12474 xmlErrMemory(NULL, "creating parser: out of memory\n");
12475 xmlFreeParserInputBuffer(buf);
12476 return(NULL);
12477 }
12478 ctxt->dictNames = 1;
12479 if (sax != NULL) {
12480 #ifdef LIBXML_SAX1_ENABLED
12481 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12482 #endif /* LIBXML_SAX1_ENABLED */
12483 xmlFree(ctxt->sax);
12484 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12485 if (ctxt->sax == NULL) {
12486 xmlErrMemory(ctxt, NULL);
12487 xmlFreeParserInputBuffer(buf);
12488 xmlFreeParserCtxt(ctxt);
12489 return(NULL);
12490 }
12491 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12492 if (sax->initialized == XML_SAX2_MAGIC)
12493 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12494 else
12495 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12496 if (user_data != NULL)
12497 ctxt->userData = user_data;
12498 }
12499 if (filename == NULL) {
12500 ctxt->directory = NULL;
12501 } else {
12502 ctxt->directory = xmlParserGetDirectory(filename);
12503 }
12504
12505 inputStream = xmlNewInputStream(ctxt);
12506 if (inputStream == NULL) {
12507 xmlFreeParserCtxt(ctxt);
12508 xmlFreeParserInputBuffer(buf);
12509 return(NULL);
12510 }
12511
12512 if (filename == NULL)
12513 inputStream->filename = NULL;
12514 else {
12515 inputStream->filename = (char *)
12516 xmlCanonicPath((const xmlChar *) filename);
12517 if (inputStream->filename == NULL) {
12518 xmlFreeParserCtxt(ctxt);
12519 xmlFreeParserInputBuffer(buf);
12520 return(NULL);
12521 }
12522 }
12523 inputStream->buf = buf;
12524 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12525 inputPush(ctxt, inputStream);
12526
12527 /*
12528 * If the caller didn't provide an initial 'chunk' for determining
12529 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12530 * that it can be automatically determined later
12531 */
12532 if ((size == 0) || (chunk == NULL)) {
12533 ctxt->charset = XML_CHAR_ENCODING_NONE;
12534 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12535 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12536 size_t cur = ctxt->input->cur - ctxt->input->base;
12537
12538 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12539
12540 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12541 #ifdef DEBUG_PUSH
12542 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12543 #endif
12544 }
12545
12546 if (enc != XML_CHAR_ENCODING_NONE) {
12547 xmlSwitchEncoding(ctxt, enc);
12548 }
12549
12550 return(ctxt);
12551 }
12552 #endif /* LIBXML_PUSH_ENABLED */
12553
12554 /**
12555 * xmlHaltParser:
12556 * @ctxt: an XML parser context
12557 *
12558 * Blocks further parser processing don't override error
12559 * for internal use
12560 */
12561 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12562 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12563 if (ctxt == NULL)
12564 return;
12565 ctxt->instate = XML_PARSER_EOF;
12566 ctxt->disableSAX = 1;
12567 while (ctxt->inputNr > 1)
12568 xmlFreeInputStream(inputPop(ctxt));
12569 if (ctxt->input != NULL) {
12570 /*
12571 * in case there was a specific allocation deallocate before
12572 * overriding base
12573 */
12574 if (ctxt->input->free != NULL) {
12575 ctxt->input->free((xmlChar *) ctxt->input->base);
12576 ctxt->input->free = NULL;
12577 }
12578 if (ctxt->input->buf != NULL) {
12579 xmlFreeParserInputBuffer(ctxt->input->buf);
12580 ctxt->input->buf = NULL;
12581 }
12582 ctxt->input->cur = BAD_CAST"";
12583 ctxt->input->length = 0;
12584 ctxt->input->base = ctxt->input->cur;
12585 ctxt->input->end = ctxt->input->cur;
12586 }
12587 }
12588
12589 /**
12590 * xmlStopParser:
12591 * @ctxt: an XML parser context
12592 *
12593 * Blocks further parser processing
12594 */
12595 void
xmlStopParser(xmlParserCtxtPtr ctxt)12596 xmlStopParser(xmlParserCtxtPtr ctxt) {
12597 if (ctxt == NULL)
12598 return;
12599 xmlHaltParser(ctxt);
12600 ctxt->errNo = XML_ERR_USER_STOP;
12601 }
12602
12603 /**
12604 * xmlCreateIOParserCtxt:
12605 * @sax: a SAX handler
12606 * @user_data: The user data returned on SAX callbacks
12607 * @ioread: an I/O read function
12608 * @ioclose: an I/O close function
12609 * @ioctx: an I/O handler
12610 * @enc: the charset encoding if known
12611 *
12612 * Create a parser context for using the XML parser with an existing
12613 * I/O stream
12614 *
12615 * Returns the new parser context or NULL
12616 */
12617 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12618 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12619 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12620 void *ioctx, xmlCharEncoding enc) {
12621 xmlParserCtxtPtr ctxt;
12622 xmlParserInputPtr inputStream;
12623 xmlParserInputBufferPtr buf;
12624
12625 if (ioread == NULL) return(NULL);
12626
12627 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12628 if (buf == NULL) {
12629 if (ioclose != NULL)
12630 ioclose(ioctx);
12631 return (NULL);
12632 }
12633
12634 ctxt = xmlNewParserCtxt();
12635 if (ctxt == NULL) {
12636 xmlFreeParserInputBuffer(buf);
12637 return(NULL);
12638 }
12639 if (sax != NULL) {
12640 #ifdef LIBXML_SAX1_ENABLED
12641 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12642 #endif /* LIBXML_SAX1_ENABLED */
12643 xmlFree(ctxt->sax);
12644 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12645 if (ctxt->sax == NULL) {
12646 xmlFreeParserInputBuffer(buf);
12647 xmlErrMemory(ctxt, NULL);
12648 xmlFreeParserCtxt(ctxt);
12649 return(NULL);
12650 }
12651 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12652 if (sax->initialized == XML_SAX2_MAGIC)
12653 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12654 else
12655 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12656 if (user_data != NULL)
12657 ctxt->userData = user_data;
12658 }
12659
12660 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12661 if (inputStream == NULL) {
12662 xmlFreeParserCtxt(ctxt);
12663 return(NULL);
12664 }
12665 inputPush(ctxt, inputStream);
12666
12667 return(ctxt);
12668 }
12669
12670 #ifdef LIBXML_VALID_ENABLED
12671 /************************************************************************
12672 * *
12673 * Front ends when parsing a DTD *
12674 * *
12675 ************************************************************************/
12676
12677 /**
12678 * xmlIOParseDTD:
12679 * @sax: the SAX handler block or NULL
12680 * @input: an Input Buffer
12681 * @enc: the charset encoding if known
12682 *
12683 * Load and parse a DTD
12684 *
12685 * Returns the resulting xmlDtdPtr or NULL in case of error.
12686 * @input will be freed by the function in any case.
12687 */
12688
12689 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12690 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12691 xmlCharEncoding enc) {
12692 xmlDtdPtr ret = NULL;
12693 xmlParserCtxtPtr ctxt;
12694 xmlParserInputPtr pinput = NULL;
12695 xmlChar start[4];
12696
12697 if (input == NULL)
12698 return(NULL);
12699
12700 ctxt = xmlNewParserCtxt();
12701 if (ctxt == NULL) {
12702 xmlFreeParserInputBuffer(input);
12703 return(NULL);
12704 }
12705
12706 /* We are loading a DTD */
12707 ctxt->options |= XML_PARSE_DTDLOAD;
12708
12709 /*
12710 * Set-up the SAX context
12711 */
12712 if (sax != NULL) {
12713 if (ctxt->sax != NULL)
12714 xmlFree(ctxt->sax);
12715 ctxt->sax = sax;
12716 ctxt->userData = ctxt;
12717 }
12718 xmlDetectSAX2(ctxt);
12719
12720 /*
12721 * generate a parser input from the I/O handler
12722 */
12723
12724 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12725 if (pinput == NULL) {
12726 if (sax != NULL) ctxt->sax = NULL;
12727 xmlFreeParserInputBuffer(input);
12728 xmlFreeParserCtxt(ctxt);
12729 return(NULL);
12730 }
12731
12732 /*
12733 * plug some encoding conversion routines here.
12734 */
12735 if (xmlPushInput(ctxt, pinput) < 0) {
12736 if (sax != NULL) ctxt->sax = NULL;
12737 xmlFreeParserCtxt(ctxt);
12738 return(NULL);
12739 }
12740 if (enc != XML_CHAR_ENCODING_NONE) {
12741 xmlSwitchEncoding(ctxt, enc);
12742 }
12743
12744 pinput->filename = NULL;
12745 pinput->line = 1;
12746 pinput->col = 1;
12747 pinput->base = ctxt->input->cur;
12748 pinput->cur = ctxt->input->cur;
12749 pinput->free = NULL;
12750
12751 /*
12752 * let's parse that entity knowing it's an external subset.
12753 */
12754 ctxt->inSubset = 2;
12755 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12756 if (ctxt->myDoc == NULL) {
12757 xmlErrMemory(ctxt, "New Doc failed");
12758 return(NULL);
12759 }
12760 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12761 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12762 BAD_CAST "none", BAD_CAST "none");
12763
12764 if ((enc == XML_CHAR_ENCODING_NONE) &&
12765 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12766 /*
12767 * Get the 4 first bytes and decode the charset
12768 * if enc != XML_CHAR_ENCODING_NONE
12769 * plug some encoding conversion routines.
12770 */
12771 start[0] = RAW;
12772 start[1] = NXT(1);
12773 start[2] = NXT(2);
12774 start[3] = NXT(3);
12775 enc = xmlDetectCharEncoding(start, 4);
12776 if (enc != XML_CHAR_ENCODING_NONE) {
12777 xmlSwitchEncoding(ctxt, enc);
12778 }
12779 }
12780
12781 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12782
12783 if (ctxt->myDoc != NULL) {
12784 if (ctxt->wellFormed) {
12785 ret = ctxt->myDoc->extSubset;
12786 ctxt->myDoc->extSubset = NULL;
12787 if (ret != NULL) {
12788 xmlNodePtr tmp;
12789
12790 ret->doc = NULL;
12791 tmp = ret->children;
12792 while (tmp != NULL) {
12793 tmp->doc = NULL;
12794 tmp = tmp->next;
12795 }
12796 }
12797 } else {
12798 ret = NULL;
12799 }
12800 xmlFreeDoc(ctxt->myDoc);
12801 ctxt->myDoc = NULL;
12802 }
12803 if (sax != NULL) ctxt->sax = NULL;
12804 xmlFreeParserCtxt(ctxt);
12805
12806 return(ret);
12807 }
12808
12809 /**
12810 * xmlSAXParseDTD:
12811 * @sax: the SAX handler block
12812 * @ExternalID: a NAME* containing the External ID of the DTD
12813 * @SystemID: a NAME* containing the URL to the DTD
12814 *
12815 * Load and parse an external subset.
12816 *
12817 * Returns the resulting xmlDtdPtr or NULL in case of error.
12818 */
12819
12820 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12821 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12822 const xmlChar *SystemID) {
12823 xmlDtdPtr ret = NULL;
12824 xmlParserCtxtPtr ctxt;
12825 xmlParserInputPtr input = NULL;
12826 xmlCharEncoding enc;
12827 xmlChar* systemIdCanonic;
12828
12829 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12830
12831 ctxt = xmlNewParserCtxt();
12832 if (ctxt == NULL) {
12833 return(NULL);
12834 }
12835
12836 /* We are loading a DTD */
12837 ctxt->options |= XML_PARSE_DTDLOAD;
12838
12839 /*
12840 * Set-up the SAX context
12841 */
12842 if (sax != NULL) {
12843 if (ctxt->sax != NULL)
12844 xmlFree(ctxt->sax);
12845 ctxt->sax = sax;
12846 ctxt->userData = ctxt;
12847 }
12848
12849 /*
12850 * Canonicalise the system ID
12851 */
12852 systemIdCanonic = xmlCanonicPath(SystemID);
12853 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12854 xmlFreeParserCtxt(ctxt);
12855 return(NULL);
12856 }
12857
12858 /*
12859 * Ask the Entity resolver to load the damn thing
12860 */
12861
12862 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12863 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12864 systemIdCanonic);
12865 if (input == NULL) {
12866 if (sax != NULL) ctxt->sax = NULL;
12867 xmlFreeParserCtxt(ctxt);
12868 if (systemIdCanonic != NULL)
12869 xmlFree(systemIdCanonic);
12870 return(NULL);
12871 }
12872
12873 /*
12874 * plug some encoding conversion routines here.
12875 */
12876 if (xmlPushInput(ctxt, input) < 0) {
12877 if (sax != NULL) ctxt->sax = NULL;
12878 xmlFreeParserCtxt(ctxt);
12879 if (systemIdCanonic != NULL)
12880 xmlFree(systemIdCanonic);
12881 return(NULL);
12882 }
12883 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12884 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12885 xmlSwitchEncoding(ctxt, enc);
12886 }
12887
12888 if (input->filename == NULL)
12889 input->filename = (char *) systemIdCanonic;
12890 else
12891 xmlFree(systemIdCanonic);
12892 input->line = 1;
12893 input->col = 1;
12894 input->base = ctxt->input->cur;
12895 input->cur = ctxt->input->cur;
12896 input->free = NULL;
12897
12898 /*
12899 * let's parse that entity knowing it's an external subset.
12900 */
12901 ctxt->inSubset = 2;
12902 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12903 if (ctxt->myDoc == NULL) {
12904 xmlErrMemory(ctxt, "New Doc failed");
12905 if (sax != NULL) ctxt->sax = NULL;
12906 xmlFreeParserCtxt(ctxt);
12907 return(NULL);
12908 }
12909 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12910 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12911 ExternalID, SystemID);
12912 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12913
12914 if (ctxt->myDoc != NULL) {
12915 if (ctxt->wellFormed) {
12916 ret = ctxt->myDoc->extSubset;
12917 ctxt->myDoc->extSubset = NULL;
12918 if (ret != NULL) {
12919 xmlNodePtr tmp;
12920
12921 ret->doc = NULL;
12922 tmp = ret->children;
12923 while (tmp != NULL) {
12924 tmp->doc = NULL;
12925 tmp = tmp->next;
12926 }
12927 }
12928 } else {
12929 ret = NULL;
12930 }
12931 xmlFreeDoc(ctxt->myDoc);
12932 ctxt->myDoc = NULL;
12933 }
12934 if (sax != NULL) ctxt->sax = NULL;
12935 xmlFreeParserCtxt(ctxt);
12936
12937 return(ret);
12938 }
12939
12940
12941 /**
12942 * xmlParseDTD:
12943 * @ExternalID: a NAME* containing the External ID of the DTD
12944 * @SystemID: a NAME* containing the URL to the DTD
12945 *
12946 * Load and parse an external subset.
12947 *
12948 * Returns the resulting xmlDtdPtr or NULL in case of error.
12949 */
12950
12951 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12952 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12953 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12954 }
12955 #endif /* LIBXML_VALID_ENABLED */
12956
12957 /************************************************************************
12958 * *
12959 * Front ends when parsing an Entity *
12960 * *
12961 ************************************************************************/
12962
12963 /**
12964 * xmlParseCtxtExternalEntity:
12965 * @ctx: the existing parsing context
12966 * @URL: the URL for the entity to load
12967 * @ID: the System ID for the entity to load
12968 * @lst: the return value for the set of parsed nodes
12969 *
12970 * Parse an external general entity within an existing parsing context
12971 * An external general parsed entity is well-formed if it matches the
12972 * production labeled extParsedEnt.
12973 *
12974 * [78] extParsedEnt ::= TextDecl? content
12975 *
12976 * Returns 0 if the entity is well formed, -1 in case of args problem and
12977 * the parser error code otherwise
12978 */
12979
12980 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12981 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12982 const xmlChar *ID, xmlNodePtr *lst) {
12983 void *userData;
12984
12985 if (ctx == NULL) return(-1);
12986 /*
12987 * If the user provided their own SAX callbacks, then reuse the
12988 * userData callback field, otherwise the expected setup in a
12989 * DOM builder is to have userData == ctxt
12990 */
12991 if (ctx->userData == ctx)
12992 userData = NULL;
12993 else
12994 userData = ctx->userData;
12995 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12996 userData, ctx->depth + 1,
12997 URL, ID, lst);
12998 }
12999
13000 /**
13001 * xmlParseExternalEntityPrivate:
13002 * @doc: the document the chunk pertains to
13003 * @oldctxt: the previous parser context if available
13004 * @sax: the SAX handler block (possibly NULL)
13005 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13006 * @depth: Used for loop detection, use 0
13007 * @URL: the URL for the entity to load
13008 * @ID: the System ID for the entity to load
13009 * @list: the return value for the set of parsed nodes
13010 *
13011 * Private version of xmlParseExternalEntity()
13012 *
13013 * Returns 0 if the entity is well formed, -1 in case of args problem and
13014 * the parser error code otherwise
13015 */
13016
13017 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13018 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13019 xmlSAXHandlerPtr sax,
13020 void *user_data, int depth, const xmlChar *URL,
13021 const xmlChar *ID, xmlNodePtr *list) {
13022 xmlParserCtxtPtr ctxt;
13023 xmlDocPtr newDoc;
13024 xmlNodePtr newRoot;
13025 xmlSAXHandlerPtr oldsax = NULL;
13026 xmlParserErrors ret = XML_ERR_OK;
13027 xmlChar start[4];
13028 xmlCharEncoding enc;
13029
13030 if (((depth > 40) &&
13031 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13032 (depth > 1024)) {
13033 return(XML_ERR_ENTITY_LOOP);
13034 }
13035
13036 if (list != NULL)
13037 *list = NULL;
13038 if ((URL == NULL) && (ID == NULL))
13039 return(XML_ERR_INTERNAL_ERROR);
13040 if (doc == NULL)
13041 return(XML_ERR_INTERNAL_ERROR);
13042
13043
13044 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13045 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13046 ctxt->userData = ctxt;
13047 if (sax != NULL) {
13048 oldsax = ctxt->sax;
13049 ctxt->sax = sax;
13050 if (user_data != NULL)
13051 ctxt->userData = user_data;
13052 }
13053 xmlDetectSAX2(ctxt);
13054 newDoc = xmlNewDoc(BAD_CAST "1.0");
13055 if (newDoc == NULL) {
13056 xmlFreeParserCtxt(ctxt);
13057 return(XML_ERR_INTERNAL_ERROR);
13058 }
13059 newDoc->properties = XML_DOC_INTERNAL;
13060 if (doc) {
13061 newDoc->intSubset = doc->intSubset;
13062 newDoc->extSubset = doc->extSubset;
13063 if (doc->dict) {
13064 newDoc->dict = doc->dict;
13065 xmlDictReference(newDoc->dict);
13066 }
13067 if (doc->URL != NULL) {
13068 newDoc->URL = xmlStrdup(doc->URL);
13069 }
13070 }
13071 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13072 if (newRoot == NULL) {
13073 if (sax != NULL)
13074 ctxt->sax = oldsax;
13075 xmlFreeParserCtxt(ctxt);
13076 newDoc->intSubset = NULL;
13077 newDoc->extSubset = NULL;
13078 xmlFreeDoc(newDoc);
13079 return(XML_ERR_INTERNAL_ERROR);
13080 }
13081 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13082 nodePush(ctxt, newDoc->children);
13083 if (doc == NULL) {
13084 ctxt->myDoc = newDoc;
13085 } else {
13086 ctxt->myDoc = doc;
13087 newRoot->doc = doc;
13088 }
13089
13090 /*
13091 * Get the 4 first bytes and decode the charset
13092 * if enc != XML_CHAR_ENCODING_NONE
13093 * plug some encoding conversion routines.
13094 */
13095 GROW;
13096 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13097 start[0] = RAW;
13098 start[1] = NXT(1);
13099 start[2] = NXT(2);
13100 start[3] = NXT(3);
13101 enc = xmlDetectCharEncoding(start, 4);
13102 if (enc != XML_CHAR_ENCODING_NONE) {
13103 xmlSwitchEncoding(ctxt, enc);
13104 }
13105 }
13106
13107 /*
13108 * Parse a possible text declaration first
13109 */
13110 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13111 xmlParseTextDecl(ctxt);
13112 /*
13113 * An XML-1.0 document can't reference an entity not XML-1.0
13114 */
13115 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13116 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13117 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13118 "Version mismatch between document and entity\n");
13119 }
13120 }
13121
13122 ctxt->instate = XML_PARSER_CONTENT;
13123 ctxt->depth = depth;
13124 if (oldctxt != NULL) {
13125 ctxt->_private = oldctxt->_private;
13126 ctxt->loadsubset = oldctxt->loadsubset;
13127 ctxt->validate = oldctxt->validate;
13128 ctxt->valid = oldctxt->valid;
13129 ctxt->replaceEntities = oldctxt->replaceEntities;
13130 if (oldctxt->validate) {
13131 ctxt->vctxt.error = oldctxt->vctxt.error;
13132 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13133 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13134 }
13135 ctxt->external = oldctxt->external;
13136 if (ctxt->dict) xmlDictFree(ctxt->dict);
13137 ctxt->dict = oldctxt->dict;
13138 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13139 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13140 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13141 ctxt->dictNames = oldctxt->dictNames;
13142 ctxt->attsDefault = oldctxt->attsDefault;
13143 ctxt->attsSpecial = oldctxt->attsSpecial;
13144 ctxt->linenumbers = oldctxt->linenumbers;
13145 ctxt->record_info = oldctxt->record_info;
13146 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13147 ctxt->node_seq.length = oldctxt->node_seq.length;
13148 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13149 } else {
13150 /*
13151 * Doing validity checking on chunk without context
13152 * doesn't make sense
13153 */
13154 ctxt->_private = NULL;
13155 ctxt->validate = 0;
13156 ctxt->external = 2;
13157 ctxt->loadsubset = 0;
13158 }
13159
13160 xmlParseContent(ctxt);
13161
13162 if ((RAW == '<') && (NXT(1) == '/')) {
13163 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13164 } else if (RAW != 0) {
13165 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13166 }
13167 if (ctxt->node != newDoc->children) {
13168 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13169 }
13170
13171 if (!ctxt->wellFormed) {
13172 if (ctxt->errNo == 0)
13173 ret = XML_ERR_INTERNAL_ERROR;
13174 else
13175 ret = (xmlParserErrors)ctxt->errNo;
13176 } else {
13177 if (list != NULL) {
13178 xmlNodePtr cur;
13179
13180 /*
13181 * Return the newly created nodeset after unlinking it from
13182 * they pseudo parent.
13183 */
13184 cur = newDoc->children->children;
13185 *list = cur;
13186 while (cur != NULL) {
13187 cur->parent = NULL;
13188 cur = cur->next;
13189 }
13190 newDoc->children->children = NULL;
13191 }
13192 ret = XML_ERR_OK;
13193 }
13194
13195 /*
13196 * Record in the parent context the number of entities replacement
13197 * done when parsing that reference.
13198 */
13199 if (oldctxt != NULL)
13200 oldctxt->nbentities += ctxt->nbentities;
13201
13202 /*
13203 * Also record the size of the entity parsed
13204 */
13205 if (ctxt->input != NULL && oldctxt != NULL) {
13206 oldctxt->sizeentities += ctxt->input->consumed;
13207 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13208 }
13209 /*
13210 * And record the last error if any
13211 */
13212 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13213 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13214
13215 if (sax != NULL)
13216 ctxt->sax = oldsax;
13217 if (oldctxt != NULL) {
13218 ctxt->dict = NULL;
13219 ctxt->attsDefault = NULL;
13220 ctxt->attsSpecial = NULL;
13221 oldctxt->validate = ctxt->validate;
13222 oldctxt->valid = ctxt->valid;
13223 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13224 oldctxt->node_seq.length = ctxt->node_seq.length;
13225 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13226 }
13227 ctxt->node_seq.maximum = 0;
13228 ctxt->node_seq.length = 0;
13229 ctxt->node_seq.buffer = NULL;
13230 xmlFreeParserCtxt(ctxt);
13231 newDoc->intSubset = NULL;
13232 newDoc->extSubset = NULL;
13233 xmlFreeDoc(newDoc);
13234
13235 return(ret);
13236 }
13237
13238 #ifdef LIBXML_SAX1_ENABLED
13239 /**
13240 * xmlParseExternalEntity:
13241 * @doc: the document the chunk pertains to
13242 * @sax: the SAX handler block (possibly NULL)
13243 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13244 * @depth: Used for loop detection, use 0
13245 * @URL: the URL for the entity to load
13246 * @ID: the System ID for the entity to load
13247 * @lst: the return value for the set of parsed nodes
13248 *
13249 * Parse an external general entity
13250 * An external general parsed entity is well-formed if it matches the
13251 * production labeled extParsedEnt.
13252 *
13253 * [78] extParsedEnt ::= TextDecl? content
13254 *
13255 * Returns 0 if the entity is well formed, -1 in case of args problem and
13256 * the parser error code otherwise
13257 */
13258
13259 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13260 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13261 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13262 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13263 ID, lst));
13264 }
13265
13266 /**
13267 * xmlParseBalancedChunkMemory:
13268 * @doc: the document the chunk pertains to (must not be NULL)
13269 * @sax: the SAX handler block (possibly NULL)
13270 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13271 * @depth: Used for loop detection, use 0
13272 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13273 * @lst: the return value for the set of parsed nodes
13274 *
13275 * Parse a well-balanced chunk of an XML document
13276 * called by the parser
13277 * The allowed sequence for the Well Balanced Chunk is the one defined by
13278 * the content production in the XML grammar:
13279 *
13280 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13281 *
13282 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13283 * the parser error code otherwise
13284 */
13285
13286 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13287 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13288 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13289 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13290 depth, string, lst, 0 );
13291 }
13292 #endif /* LIBXML_SAX1_ENABLED */
13293
13294 /**
13295 * xmlParseBalancedChunkMemoryInternal:
13296 * @oldctxt: the existing parsing context
13297 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13298 * @user_data: the user data field for the parser context
13299 * @lst: the return value for the set of parsed nodes
13300 *
13301 *
13302 * Parse a well-balanced chunk of an XML document
13303 * called by the parser
13304 * The allowed sequence for the Well Balanced Chunk is the one defined by
13305 * the content production in the XML grammar:
13306 *
13307 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13308 *
13309 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13310 * error code otherwise
13311 *
13312 * In case recover is set to 1, the nodelist will not be empty even if
13313 * the parsed chunk is not well balanced.
13314 */
13315 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13316 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13317 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13318 xmlParserCtxtPtr ctxt;
13319 xmlDocPtr newDoc = NULL;
13320 xmlNodePtr newRoot;
13321 xmlSAXHandlerPtr oldsax = NULL;
13322 xmlNodePtr content = NULL;
13323 xmlNodePtr last = NULL;
13324 int size;
13325 xmlParserErrors ret = XML_ERR_OK;
13326 #ifdef SAX2
13327 int i;
13328 #endif
13329
13330 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13331 (oldctxt->depth > 1024)) {
13332 return(XML_ERR_ENTITY_LOOP);
13333 }
13334
13335
13336 if (lst != NULL)
13337 *lst = NULL;
13338 if (string == NULL)
13339 return(XML_ERR_INTERNAL_ERROR);
13340
13341 size = xmlStrlen(string);
13342
13343 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13344 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13345 if (user_data != NULL)
13346 ctxt->userData = user_data;
13347 else
13348 ctxt->userData = ctxt;
13349 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13350 ctxt->dict = oldctxt->dict;
13351 ctxt->input_id = oldctxt->input_id + 1;
13352 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13353 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13354 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13355
13356 #ifdef SAX2
13357 /* propagate namespaces down the entity */
13358 for (i = 0;i < oldctxt->nsNr;i += 2) {
13359 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13360 }
13361 #endif
13362
13363 oldsax = ctxt->sax;
13364 ctxt->sax = oldctxt->sax;
13365 xmlDetectSAX2(ctxt);
13366 ctxt->replaceEntities = oldctxt->replaceEntities;
13367 ctxt->options = oldctxt->options;
13368
13369 ctxt->_private = oldctxt->_private;
13370 if (oldctxt->myDoc == NULL) {
13371 newDoc = xmlNewDoc(BAD_CAST "1.0");
13372 if (newDoc == NULL) {
13373 ctxt->sax = oldsax;
13374 ctxt->dict = NULL;
13375 xmlFreeParserCtxt(ctxt);
13376 return(XML_ERR_INTERNAL_ERROR);
13377 }
13378 newDoc->properties = XML_DOC_INTERNAL;
13379 newDoc->dict = ctxt->dict;
13380 xmlDictReference(newDoc->dict);
13381 ctxt->myDoc = newDoc;
13382 } else {
13383 ctxt->myDoc = oldctxt->myDoc;
13384 content = ctxt->myDoc->children;
13385 last = ctxt->myDoc->last;
13386 }
13387 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13388 if (newRoot == NULL) {
13389 ctxt->sax = oldsax;
13390 ctxt->dict = NULL;
13391 xmlFreeParserCtxt(ctxt);
13392 if (newDoc != NULL) {
13393 xmlFreeDoc(newDoc);
13394 }
13395 return(XML_ERR_INTERNAL_ERROR);
13396 }
13397 ctxt->myDoc->children = NULL;
13398 ctxt->myDoc->last = NULL;
13399 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13400 nodePush(ctxt, ctxt->myDoc->children);
13401 ctxt->instate = XML_PARSER_CONTENT;
13402 ctxt->depth = oldctxt->depth + 1;
13403
13404 ctxt->validate = 0;
13405 ctxt->loadsubset = oldctxt->loadsubset;
13406 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13407 /*
13408 * ID/IDREF registration will be done in xmlValidateElement below
13409 */
13410 ctxt->loadsubset |= XML_SKIP_IDS;
13411 }
13412 ctxt->dictNames = oldctxt->dictNames;
13413 ctxt->attsDefault = oldctxt->attsDefault;
13414 ctxt->attsSpecial = oldctxt->attsSpecial;
13415
13416 xmlParseContent(ctxt);
13417 if ((RAW == '<') && (NXT(1) == '/')) {
13418 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13419 } else if (RAW != 0) {
13420 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13421 }
13422 if (ctxt->node != ctxt->myDoc->children) {
13423 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13424 }
13425
13426 if (!ctxt->wellFormed) {
13427 if (ctxt->errNo == 0)
13428 ret = XML_ERR_INTERNAL_ERROR;
13429 else
13430 ret = (xmlParserErrors)ctxt->errNo;
13431 } else {
13432 ret = XML_ERR_OK;
13433 }
13434
13435 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13436 xmlNodePtr cur;
13437
13438 /*
13439 * Return the newly created nodeset after unlinking it from
13440 * they pseudo parent.
13441 */
13442 cur = ctxt->myDoc->children->children;
13443 *lst = cur;
13444 while (cur != NULL) {
13445 #ifdef LIBXML_VALID_ENABLED
13446 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13447 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13448 (cur->type == XML_ELEMENT_NODE)) {
13449 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13450 oldctxt->myDoc, cur);
13451 }
13452 #endif /* LIBXML_VALID_ENABLED */
13453 cur->parent = NULL;
13454 cur = cur->next;
13455 }
13456 ctxt->myDoc->children->children = NULL;
13457 }
13458 if (ctxt->myDoc != NULL) {
13459 xmlFreeNode(ctxt->myDoc->children);
13460 ctxt->myDoc->children = content;
13461 ctxt->myDoc->last = last;
13462 }
13463
13464 /*
13465 * Record in the parent context the number of entities replacement
13466 * done when parsing that reference.
13467 */
13468 if (oldctxt != NULL)
13469 oldctxt->nbentities += ctxt->nbentities;
13470
13471 /*
13472 * Also record the last error if any
13473 */
13474 if (ctxt->lastError.code != XML_ERR_OK)
13475 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13476
13477 ctxt->sax = oldsax;
13478 ctxt->dict = NULL;
13479 ctxt->attsDefault = NULL;
13480 ctxt->attsSpecial = NULL;
13481 xmlFreeParserCtxt(ctxt);
13482 if (newDoc != NULL) {
13483 xmlFreeDoc(newDoc);
13484 }
13485
13486 return(ret);
13487 }
13488
13489 /**
13490 * xmlParseInNodeContext:
13491 * @node: the context node
13492 * @data: the input string
13493 * @datalen: the input string length in bytes
13494 * @options: a combination of xmlParserOption
13495 * @lst: the return value for the set of parsed nodes
13496 *
13497 * Parse a well-balanced chunk of an XML document
13498 * within the context (DTD, namespaces, etc ...) of the given node.
13499 *
13500 * The allowed sequence for the data is a Well Balanced Chunk defined by
13501 * the content production in the XML grammar:
13502 *
13503 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13504 *
13505 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13506 * error code otherwise
13507 */
13508 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13509 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13510 int options, xmlNodePtr *lst) {
13511 #ifdef SAX2
13512 xmlParserCtxtPtr ctxt;
13513 xmlDocPtr doc = NULL;
13514 xmlNodePtr fake, cur;
13515 int nsnr = 0;
13516
13517 xmlParserErrors ret = XML_ERR_OK;
13518
13519 /*
13520 * check all input parameters, grab the document
13521 */
13522 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13523 return(XML_ERR_INTERNAL_ERROR);
13524 switch (node->type) {
13525 case XML_ELEMENT_NODE:
13526 case XML_ATTRIBUTE_NODE:
13527 case XML_TEXT_NODE:
13528 case XML_CDATA_SECTION_NODE:
13529 case XML_ENTITY_REF_NODE:
13530 case XML_PI_NODE:
13531 case XML_COMMENT_NODE:
13532 case XML_DOCUMENT_NODE:
13533 case XML_HTML_DOCUMENT_NODE:
13534 break;
13535 default:
13536 return(XML_ERR_INTERNAL_ERROR);
13537
13538 }
13539 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13540 (node->type != XML_DOCUMENT_NODE) &&
13541 (node->type != XML_HTML_DOCUMENT_NODE))
13542 node = node->parent;
13543 if (node == NULL)
13544 return(XML_ERR_INTERNAL_ERROR);
13545 if (node->type == XML_ELEMENT_NODE)
13546 doc = node->doc;
13547 else
13548 doc = (xmlDocPtr) node;
13549 if (doc == NULL)
13550 return(XML_ERR_INTERNAL_ERROR);
13551
13552 /*
13553 * allocate a context and set-up everything not related to the
13554 * node position in the tree
13555 */
13556 if (doc->type == XML_DOCUMENT_NODE)
13557 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13558 #ifdef LIBXML_HTML_ENABLED
13559 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13560 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13561 /*
13562 * When parsing in context, it makes no sense to add implied
13563 * elements like html/body/etc...
13564 */
13565 options |= HTML_PARSE_NOIMPLIED;
13566 }
13567 #endif
13568 else
13569 return(XML_ERR_INTERNAL_ERROR);
13570
13571 if (ctxt == NULL)
13572 return(XML_ERR_NO_MEMORY);
13573
13574 /*
13575 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13576 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13577 * we must wait until the last moment to free the original one.
13578 */
13579 if (doc->dict != NULL) {
13580 if (ctxt->dict != NULL)
13581 xmlDictFree(ctxt->dict);
13582 ctxt->dict = doc->dict;
13583 } else
13584 options |= XML_PARSE_NODICT;
13585
13586 if (doc->encoding != NULL) {
13587 xmlCharEncodingHandlerPtr hdlr;
13588
13589 if (ctxt->encoding != NULL)
13590 xmlFree((xmlChar *) ctxt->encoding);
13591 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13592
13593 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13594 if (hdlr != NULL) {
13595 xmlSwitchToEncoding(ctxt, hdlr);
13596 } else {
13597 return(XML_ERR_UNSUPPORTED_ENCODING);
13598 }
13599 }
13600
13601 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13602 xmlDetectSAX2(ctxt);
13603 ctxt->myDoc = doc;
13604 /* parsing in context, i.e. as within existing content */
13605 ctxt->input_id = 2;
13606 ctxt->instate = XML_PARSER_CONTENT;
13607
13608 fake = xmlNewComment(NULL);
13609 if (fake == NULL) {
13610 xmlFreeParserCtxt(ctxt);
13611 return(XML_ERR_NO_MEMORY);
13612 }
13613 xmlAddChild(node, fake);
13614
13615 if (node->type == XML_ELEMENT_NODE) {
13616 nodePush(ctxt, node);
13617 /*
13618 * initialize the SAX2 namespaces stack
13619 */
13620 cur = node;
13621 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13622 xmlNsPtr ns = cur->nsDef;
13623 const xmlChar *iprefix, *ihref;
13624
13625 while (ns != NULL) {
13626 if (ctxt->dict) {
13627 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13628 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13629 } else {
13630 iprefix = ns->prefix;
13631 ihref = ns->href;
13632 }
13633
13634 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13635 nsPush(ctxt, iprefix, ihref);
13636 nsnr++;
13637 }
13638 ns = ns->next;
13639 }
13640 cur = cur->parent;
13641 }
13642 }
13643
13644 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13645 /*
13646 * ID/IDREF registration will be done in xmlValidateElement below
13647 */
13648 ctxt->loadsubset |= XML_SKIP_IDS;
13649 }
13650
13651 #ifdef LIBXML_HTML_ENABLED
13652 if (doc->type == XML_HTML_DOCUMENT_NODE)
13653 __htmlParseContent(ctxt);
13654 else
13655 #endif
13656 xmlParseContent(ctxt);
13657
13658 nsPop(ctxt, nsnr);
13659 if ((RAW == '<') && (NXT(1) == '/')) {
13660 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13661 } else if (RAW != 0) {
13662 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13663 }
13664 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13665 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13666 ctxt->wellFormed = 0;
13667 }
13668
13669 if (!ctxt->wellFormed) {
13670 if (ctxt->errNo == 0)
13671 ret = XML_ERR_INTERNAL_ERROR;
13672 else
13673 ret = (xmlParserErrors)ctxt->errNo;
13674 } else {
13675 ret = XML_ERR_OK;
13676 }
13677
13678 /*
13679 * Return the newly created nodeset after unlinking it from
13680 * the pseudo sibling.
13681 */
13682
13683 cur = fake->next;
13684 fake->next = NULL;
13685 node->last = fake;
13686
13687 if (cur != NULL) {
13688 cur->prev = NULL;
13689 }
13690
13691 *lst = cur;
13692
13693 while (cur != NULL) {
13694 cur->parent = NULL;
13695 cur = cur->next;
13696 }
13697
13698 xmlUnlinkNode(fake);
13699 xmlFreeNode(fake);
13700
13701
13702 if (ret != XML_ERR_OK) {
13703 xmlFreeNodeList(*lst);
13704 *lst = NULL;
13705 }
13706
13707 if (doc->dict != NULL)
13708 ctxt->dict = NULL;
13709 xmlFreeParserCtxt(ctxt);
13710
13711 return(ret);
13712 #else /* !SAX2 */
13713 return(XML_ERR_INTERNAL_ERROR);
13714 #endif
13715 }
13716
13717 #ifdef LIBXML_SAX1_ENABLED
13718 /**
13719 * xmlParseBalancedChunkMemoryRecover:
13720 * @doc: the document the chunk pertains to (must not be NULL)
13721 * @sax: the SAX handler block (possibly NULL)
13722 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13723 * @depth: Used for loop detection, use 0
13724 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13725 * @lst: the return value for the set of parsed nodes
13726 * @recover: return nodes even if the data is broken (use 0)
13727 *
13728 *
13729 * Parse a well-balanced chunk of an XML document
13730 * called by the parser
13731 * The allowed sequence for the Well Balanced Chunk is the one defined by
13732 * the content production in the XML grammar:
13733 *
13734 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13735 *
13736 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13737 * the parser error code otherwise
13738 *
13739 * In case recover is set to 1, the nodelist will not be empty even if
13740 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13741 * some extent.
13742 */
13743 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13744 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13745 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13746 int recover) {
13747 xmlParserCtxtPtr ctxt;
13748 xmlDocPtr newDoc;
13749 xmlSAXHandlerPtr oldsax = NULL;
13750 xmlNodePtr content, newRoot;
13751 int size;
13752 int ret = 0;
13753
13754 if (depth > 40) {
13755 return(XML_ERR_ENTITY_LOOP);
13756 }
13757
13758
13759 if (lst != NULL)
13760 *lst = NULL;
13761 if (string == NULL)
13762 return(-1);
13763
13764 size = xmlStrlen(string);
13765
13766 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13767 if (ctxt == NULL) return(-1);
13768 ctxt->userData = ctxt;
13769 if (sax != NULL) {
13770 oldsax = ctxt->sax;
13771 ctxt->sax = sax;
13772 if (user_data != NULL)
13773 ctxt->userData = user_data;
13774 }
13775 newDoc = xmlNewDoc(BAD_CAST "1.0");
13776 if (newDoc == NULL) {
13777 xmlFreeParserCtxt(ctxt);
13778 return(-1);
13779 }
13780 newDoc->properties = XML_DOC_INTERNAL;
13781 if ((doc != NULL) && (doc->dict != NULL)) {
13782 xmlDictFree(ctxt->dict);
13783 ctxt->dict = doc->dict;
13784 xmlDictReference(ctxt->dict);
13785 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13786 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13787 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13788 ctxt->dictNames = 1;
13789 } else {
13790 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13791 }
13792 /* doc == NULL is only supported for historic reasons */
13793 if (doc != NULL) {
13794 newDoc->intSubset = doc->intSubset;
13795 newDoc->extSubset = doc->extSubset;
13796 }
13797 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13798 if (newRoot == NULL) {
13799 if (sax != NULL)
13800 ctxt->sax = oldsax;
13801 xmlFreeParserCtxt(ctxt);
13802 newDoc->intSubset = NULL;
13803 newDoc->extSubset = NULL;
13804 xmlFreeDoc(newDoc);
13805 return(-1);
13806 }
13807 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13808 nodePush(ctxt, newRoot);
13809 /* doc == NULL is only supported for historic reasons */
13810 if (doc == NULL) {
13811 ctxt->myDoc = newDoc;
13812 } else {
13813 ctxt->myDoc = newDoc;
13814 newDoc->children->doc = doc;
13815 /* Ensure that doc has XML spec namespace */
13816 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13817 newDoc->oldNs = doc->oldNs;
13818 }
13819 ctxt->instate = XML_PARSER_CONTENT;
13820 ctxt->input_id = 2;
13821 ctxt->depth = depth;
13822
13823 /*
13824 * Doing validity checking on chunk doesn't make sense
13825 */
13826 ctxt->validate = 0;
13827 ctxt->loadsubset = 0;
13828 xmlDetectSAX2(ctxt);
13829
13830 if ( doc != NULL ){
13831 content = doc->children;
13832 doc->children = NULL;
13833 xmlParseContent(ctxt);
13834 doc->children = content;
13835 }
13836 else {
13837 xmlParseContent(ctxt);
13838 }
13839 if ((RAW == '<') && (NXT(1) == '/')) {
13840 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13841 } else if (RAW != 0) {
13842 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13843 }
13844 if (ctxt->node != newDoc->children) {
13845 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13846 }
13847
13848 if (!ctxt->wellFormed) {
13849 if (ctxt->errNo == 0)
13850 ret = 1;
13851 else
13852 ret = ctxt->errNo;
13853 } else {
13854 ret = 0;
13855 }
13856
13857 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13858 xmlNodePtr cur;
13859
13860 /*
13861 * Return the newly created nodeset after unlinking it from
13862 * they pseudo parent.
13863 */
13864 cur = newDoc->children->children;
13865 *lst = cur;
13866 while (cur != NULL) {
13867 xmlSetTreeDoc(cur, doc);
13868 cur->parent = NULL;
13869 cur = cur->next;
13870 }
13871 newDoc->children->children = NULL;
13872 }
13873
13874 if (sax != NULL)
13875 ctxt->sax = oldsax;
13876 xmlFreeParserCtxt(ctxt);
13877 newDoc->intSubset = NULL;
13878 newDoc->extSubset = NULL;
13879 /* This leaks the namespace list if doc == NULL */
13880 newDoc->oldNs = NULL;
13881 xmlFreeDoc(newDoc);
13882
13883 return(ret);
13884 }
13885
13886 /**
13887 * xmlSAXParseEntity:
13888 * @sax: the SAX handler block
13889 * @filename: the filename
13890 *
13891 * parse an XML external entity out of context and build a tree.
13892 * It use the given SAX function block to handle the parsing callback.
13893 * If sax is NULL, fallback to the default DOM tree building routines.
13894 *
13895 * [78] extParsedEnt ::= TextDecl? content
13896 *
13897 * This correspond to a "Well Balanced" chunk
13898 *
13899 * Returns the resulting document tree
13900 */
13901
13902 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13903 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13904 xmlDocPtr ret;
13905 xmlParserCtxtPtr ctxt;
13906
13907 ctxt = xmlCreateFileParserCtxt(filename);
13908 if (ctxt == NULL) {
13909 return(NULL);
13910 }
13911 if (sax != NULL) {
13912 if (ctxt->sax != NULL)
13913 xmlFree(ctxt->sax);
13914 ctxt->sax = sax;
13915 ctxt->userData = NULL;
13916 }
13917
13918 xmlParseExtParsedEnt(ctxt);
13919
13920 if (ctxt->wellFormed)
13921 ret = ctxt->myDoc;
13922 else {
13923 ret = NULL;
13924 xmlFreeDoc(ctxt->myDoc);
13925 ctxt->myDoc = NULL;
13926 }
13927 if (sax != NULL)
13928 ctxt->sax = NULL;
13929 xmlFreeParserCtxt(ctxt);
13930
13931 return(ret);
13932 }
13933
13934 /**
13935 * xmlParseEntity:
13936 * @filename: the filename
13937 *
13938 * parse an XML external entity out of context and build a tree.
13939 *
13940 * [78] extParsedEnt ::= TextDecl? content
13941 *
13942 * This correspond to a "Well Balanced" chunk
13943 *
13944 * Returns the resulting document tree
13945 */
13946
13947 xmlDocPtr
xmlParseEntity(const char * filename)13948 xmlParseEntity(const char *filename) {
13949 return(xmlSAXParseEntity(NULL, filename));
13950 }
13951 #endif /* LIBXML_SAX1_ENABLED */
13952
13953 /**
13954 * xmlCreateEntityParserCtxtInternal:
13955 * @URL: the entity URL
13956 * @ID: the entity PUBLIC ID
13957 * @base: a possible base for the target URI
13958 * @pctx: parser context used to set options on new context
13959 *
13960 * Create a parser context for an external entity
13961 * Automatic support for ZLIB/Compress compressed document is provided
13962 * by default if found at compile-time.
13963 *
13964 * Returns the new parser context or NULL
13965 */
13966 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13967 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13968 const xmlChar *base, xmlParserCtxtPtr pctx) {
13969 xmlParserCtxtPtr ctxt;
13970 xmlParserInputPtr inputStream;
13971 char *directory = NULL;
13972 xmlChar *uri;
13973
13974 ctxt = xmlNewParserCtxt();
13975 if (ctxt == NULL) {
13976 return(NULL);
13977 }
13978
13979 if (pctx != NULL) {
13980 ctxt->options = pctx->options;
13981 ctxt->_private = pctx->_private;
13982 /*
13983 * this is a subparser of pctx, so the input_id should be
13984 * incremented to distinguish from main entity
13985 */
13986 ctxt->input_id = pctx->input_id + 1;
13987 }
13988
13989 /* Don't read from stdin. */
13990 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13991 URL = BAD_CAST "./-";
13992
13993 uri = xmlBuildURI(URL, base);
13994
13995 if (uri == NULL) {
13996 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13997 if (inputStream == NULL) {
13998 xmlFreeParserCtxt(ctxt);
13999 return(NULL);
14000 }
14001
14002 inputPush(ctxt, inputStream);
14003
14004 if ((ctxt->directory == NULL) && (directory == NULL))
14005 directory = xmlParserGetDirectory((char *)URL);
14006 if ((ctxt->directory == NULL) && (directory != NULL))
14007 ctxt->directory = directory;
14008 } else {
14009 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14010 if (inputStream == NULL) {
14011 xmlFree(uri);
14012 xmlFreeParserCtxt(ctxt);
14013 return(NULL);
14014 }
14015
14016 inputPush(ctxt, inputStream);
14017
14018 if ((ctxt->directory == NULL) && (directory == NULL))
14019 directory = xmlParserGetDirectory((char *)uri);
14020 if ((ctxt->directory == NULL) && (directory != NULL))
14021 ctxt->directory = directory;
14022 xmlFree(uri);
14023 }
14024 return(ctxt);
14025 }
14026
14027 /**
14028 * xmlCreateEntityParserCtxt:
14029 * @URL: the entity URL
14030 * @ID: the entity PUBLIC ID
14031 * @base: a possible base for the target URI
14032 *
14033 * Create a parser context for an external entity
14034 * Automatic support for ZLIB/Compress compressed document is provided
14035 * by default if found at compile-time.
14036 *
14037 * Returns the new parser context or NULL
14038 */
14039 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14040 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14041 const xmlChar *base) {
14042 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14043
14044 }
14045
14046 /************************************************************************
14047 * *
14048 * Front ends when parsing from a file *
14049 * *
14050 ************************************************************************/
14051
14052 /**
14053 * xmlCreateURLParserCtxt:
14054 * @filename: the filename or URL
14055 * @options: a combination of xmlParserOption
14056 *
14057 * Create a parser context for a file or URL content.
14058 * Automatic support for ZLIB/Compress compressed document is provided
14059 * by default if found at compile-time and for file accesses
14060 *
14061 * Returns the new parser context or NULL
14062 */
14063 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14064 xmlCreateURLParserCtxt(const char *filename, int options)
14065 {
14066 xmlParserCtxtPtr ctxt;
14067 xmlParserInputPtr inputStream;
14068 char *directory = NULL;
14069
14070 ctxt = xmlNewParserCtxt();
14071 if (ctxt == NULL) {
14072 xmlErrMemory(NULL, "cannot allocate parser context");
14073 return(NULL);
14074 }
14075
14076 if (options)
14077 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14078 ctxt->linenumbers = 1;
14079
14080 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14081 if (inputStream == NULL) {
14082 xmlFreeParserCtxt(ctxt);
14083 return(NULL);
14084 }
14085
14086 inputPush(ctxt, inputStream);
14087 if ((ctxt->directory == NULL) && (directory == NULL))
14088 directory = xmlParserGetDirectory(filename);
14089 if ((ctxt->directory == NULL) && (directory != NULL))
14090 ctxt->directory = directory;
14091
14092 return(ctxt);
14093 }
14094
14095 /**
14096 * xmlCreateFileParserCtxt:
14097 * @filename: the filename
14098 *
14099 * Create a parser context for a file content.
14100 * Automatic support for ZLIB/Compress compressed document is provided
14101 * by default if found at compile-time.
14102 *
14103 * Returns the new parser context or NULL
14104 */
14105 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14106 xmlCreateFileParserCtxt(const char *filename)
14107 {
14108 return(xmlCreateURLParserCtxt(filename, 0));
14109 }
14110
14111 #ifdef LIBXML_SAX1_ENABLED
14112 /**
14113 * xmlSAXParseFileWithData:
14114 * @sax: the SAX handler block
14115 * @filename: the filename
14116 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14117 * documents
14118 * @data: the userdata
14119 *
14120 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14121 * compressed document is provided by default if found at compile-time.
14122 * It use the given SAX function block to handle the parsing callback.
14123 * If sax is NULL, fallback to the default DOM tree building routines.
14124 *
14125 * User data (void *) is stored within the parser context in the
14126 * context's _private member, so it is available nearly everywhere in libxml
14127 *
14128 * Returns the resulting document tree
14129 */
14130
14131 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14132 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14133 int recovery, void *data) {
14134 xmlDocPtr ret;
14135 xmlParserCtxtPtr ctxt;
14136
14137 xmlInitParser();
14138
14139 ctxt = xmlCreateFileParserCtxt(filename);
14140 if (ctxt == NULL) {
14141 return(NULL);
14142 }
14143 if (sax != NULL) {
14144 if (ctxt->sax != NULL)
14145 xmlFree(ctxt->sax);
14146 ctxt->sax = sax;
14147 }
14148 xmlDetectSAX2(ctxt);
14149 if (data!=NULL) {
14150 ctxt->_private = data;
14151 }
14152
14153 if (ctxt->directory == NULL)
14154 ctxt->directory = xmlParserGetDirectory(filename);
14155
14156 ctxt->recovery = recovery;
14157
14158 xmlParseDocument(ctxt);
14159
14160 if ((ctxt->wellFormed) || recovery) {
14161 ret = ctxt->myDoc;
14162 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14163 if (ctxt->input->buf->compressed > 0)
14164 ret->compression = 9;
14165 else
14166 ret->compression = ctxt->input->buf->compressed;
14167 }
14168 }
14169 else {
14170 ret = NULL;
14171 xmlFreeDoc(ctxt->myDoc);
14172 ctxt->myDoc = NULL;
14173 }
14174 if (sax != NULL)
14175 ctxt->sax = NULL;
14176 xmlFreeParserCtxt(ctxt);
14177
14178 return(ret);
14179 }
14180
14181 /**
14182 * xmlSAXParseFile:
14183 * @sax: the SAX handler block
14184 * @filename: the filename
14185 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14186 * documents
14187 *
14188 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14189 * compressed document is provided by default if found at compile-time.
14190 * It use the given SAX function block to handle the parsing callback.
14191 * If sax is NULL, fallback to the default DOM tree building routines.
14192 *
14193 * Returns the resulting document tree
14194 */
14195
14196 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14197 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14198 int recovery) {
14199 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14200 }
14201
14202 /**
14203 * xmlRecoverDoc:
14204 * @cur: a pointer to an array of xmlChar
14205 *
14206 * parse an XML in-memory document and build a tree.
14207 * In the case the document is not Well Formed, a attempt to build a
14208 * tree is tried anyway
14209 *
14210 * Returns the resulting document tree or NULL in case of failure
14211 */
14212
14213 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14214 xmlRecoverDoc(const xmlChar *cur) {
14215 return(xmlSAXParseDoc(NULL, cur, 1));
14216 }
14217
14218 /**
14219 * xmlParseFile:
14220 * @filename: the filename
14221 *
14222 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14223 * compressed document is provided by default if found at compile-time.
14224 *
14225 * Returns the resulting document tree if the file was wellformed,
14226 * NULL otherwise.
14227 */
14228
14229 xmlDocPtr
xmlParseFile(const char * filename)14230 xmlParseFile(const char *filename) {
14231 return(xmlSAXParseFile(NULL, filename, 0));
14232 }
14233
14234 /**
14235 * xmlRecoverFile:
14236 * @filename: the filename
14237 *
14238 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14239 * compressed document is provided by default if found at compile-time.
14240 * In the case the document is not Well Formed, it attempts to build
14241 * a tree anyway
14242 *
14243 * Returns the resulting document tree or NULL in case of failure
14244 */
14245
14246 xmlDocPtr
xmlRecoverFile(const char * filename)14247 xmlRecoverFile(const char *filename) {
14248 return(xmlSAXParseFile(NULL, filename, 1));
14249 }
14250
14251
14252 /**
14253 * xmlSetupParserForBuffer:
14254 * @ctxt: an XML parser context
14255 * @buffer: a xmlChar * buffer
14256 * @filename: a file name
14257 *
14258 * Setup the parser context to parse a new buffer; Clears any prior
14259 * contents from the parser context. The buffer parameter must not be
14260 * NULL, but the filename parameter can be
14261 */
14262 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14263 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14264 const char* filename)
14265 {
14266 xmlParserInputPtr input;
14267
14268 if ((ctxt == NULL) || (buffer == NULL))
14269 return;
14270
14271 input = xmlNewInputStream(ctxt);
14272 if (input == NULL) {
14273 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14274 xmlClearParserCtxt(ctxt);
14275 return;
14276 }
14277
14278 xmlClearParserCtxt(ctxt);
14279 if (filename != NULL)
14280 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14281 input->base = buffer;
14282 input->cur = buffer;
14283 input->end = &buffer[xmlStrlen(buffer)];
14284 inputPush(ctxt, input);
14285 }
14286
14287 /**
14288 * xmlSAXUserParseFile:
14289 * @sax: a SAX handler
14290 * @user_data: The user data returned on SAX callbacks
14291 * @filename: a file name
14292 *
14293 * parse an XML file and call the given SAX handler routines.
14294 * Automatic support for ZLIB/Compress compressed document is provided
14295 *
14296 * Returns 0 in case of success or a error number otherwise
14297 */
14298 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14299 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14300 const char *filename) {
14301 int ret = 0;
14302 xmlParserCtxtPtr ctxt;
14303
14304 ctxt = xmlCreateFileParserCtxt(filename);
14305 if (ctxt == NULL) return -1;
14306 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14307 xmlFree(ctxt->sax);
14308 ctxt->sax = sax;
14309 xmlDetectSAX2(ctxt);
14310
14311 if (user_data != NULL)
14312 ctxt->userData = user_data;
14313
14314 xmlParseDocument(ctxt);
14315
14316 if (ctxt->wellFormed)
14317 ret = 0;
14318 else {
14319 if (ctxt->errNo != 0)
14320 ret = ctxt->errNo;
14321 else
14322 ret = -1;
14323 }
14324 if (sax != NULL)
14325 ctxt->sax = NULL;
14326 if (ctxt->myDoc != NULL) {
14327 xmlFreeDoc(ctxt->myDoc);
14328 ctxt->myDoc = NULL;
14329 }
14330 xmlFreeParserCtxt(ctxt);
14331
14332 return ret;
14333 }
14334 #endif /* LIBXML_SAX1_ENABLED */
14335
14336 /************************************************************************
14337 * *
14338 * Front ends when parsing from memory *
14339 * *
14340 ************************************************************************/
14341
14342 /**
14343 * xmlCreateMemoryParserCtxt:
14344 * @buffer: a pointer to a char array
14345 * @size: the size of the array
14346 *
14347 * Create a parser context for an XML in-memory document.
14348 *
14349 * Returns the new parser context or NULL
14350 */
14351 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14352 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14353 xmlParserCtxtPtr ctxt;
14354 xmlParserInputPtr input;
14355 xmlParserInputBufferPtr buf;
14356
14357 if (buffer == NULL)
14358 return(NULL);
14359 if (size <= 0)
14360 return(NULL);
14361
14362 ctxt = xmlNewParserCtxt();
14363 if (ctxt == NULL)
14364 return(NULL);
14365
14366 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14367 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14368 if (buf == NULL) {
14369 xmlFreeParserCtxt(ctxt);
14370 return(NULL);
14371 }
14372
14373 input = xmlNewInputStream(ctxt);
14374 if (input == NULL) {
14375 xmlFreeParserInputBuffer(buf);
14376 xmlFreeParserCtxt(ctxt);
14377 return(NULL);
14378 }
14379
14380 input->filename = NULL;
14381 input->buf = buf;
14382 xmlBufResetInput(input->buf->buffer, input);
14383
14384 inputPush(ctxt, input);
14385 return(ctxt);
14386 }
14387
14388 #ifdef LIBXML_SAX1_ENABLED
14389 /**
14390 * xmlSAXParseMemoryWithData:
14391 * @sax: the SAX handler block
14392 * @buffer: an pointer to a char array
14393 * @size: the size of the array
14394 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14395 * documents
14396 * @data: the userdata
14397 *
14398 * parse an XML in-memory block and use the given SAX function block
14399 * to handle the parsing callback. If sax is NULL, fallback to the default
14400 * DOM tree building routines.
14401 *
14402 * User data (void *) is stored within the parser context in the
14403 * context's _private member, so it is available nearly everywhere in libxml
14404 *
14405 * Returns the resulting document tree
14406 */
14407
14408 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14409 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14410 int size, int recovery, void *data) {
14411 xmlDocPtr ret;
14412 xmlParserCtxtPtr ctxt;
14413
14414 xmlInitParser();
14415
14416 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14417 if (ctxt == NULL) return(NULL);
14418 if (sax != NULL) {
14419 if (ctxt->sax != NULL)
14420 xmlFree(ctxt->sax);
14421 ctxt->sax = sax;
14422 }
14423 xmlDetectSAX2(ctxt);
14424 if (data!=NULL) {
14425 ctxt->_private=data;
14426 }
14427
14428 ctxt->recovery = recovery;
14429
14430 xmlParseDocument(ctxt);
14431
14432 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433 else {
14434 ret = NULL;
14435 xmlFreeDoc(ctxt->myDoc);
14436 ctxt->myDoc = NULL;
14437 }
14438 if (sax != NULL)
14439 ctxt->sax = NULL;
14440 xmlFreeParserCtxt(ctxt);
14441
14442 return(ret);
14443 }
14444
14445 /**
14446 * xmlSAXParseMemory:
14447 * @sax: the SAX handler block
14448 * @buffer: an pointer to a char array
14449 * @size: the size of the array
14450 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14451 * documents
14452 *
14453 * parse an XML in-memory block and use the given SAX function block
14454 * to handle the parsing callback. If sax is NULL, fallback to the default
14455 * DOM tree building routines.
14456 *
14457 * Returns the resulting document tree
14458 */
14459 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14460 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14461 int size, int recovery) {
14462 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14463 }
14464
14465 /**
14466 * xmlParseMemory:
14467 * @buffer: an pointer to a char array
14468 * @size: the size of the array
14469 *
14470 * parse an XML in-memory block and build a tree.
14471 *
14472 * Returns the resulting document tree
14473 */
14474
xmlParseMemory(const char * buffer,int size)14475 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14476 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14477 }
14478
14479 /**
14480 * xmlRecoverMemory:
14481 * @buffer: an pointer to a char array
14482 * @size: the size of the array
14483 *
14484 * parse an XML in-memory block and build a tree.
14485 * In the case the document is not Well Formed, an attempt to
14486 * build a tree is tried anyway
14487 *
14488 * Returns the resulting document tree or NULL in case of error
14489 */
14490
xmlRecoverMemory(const char * buffer,int size)14491 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14492 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14493 }
14494
14495 /**
14496 * xmlSAXUserParseMemory:
14497 * @sax: a SAX handler
14498 * @user_data: The user data returned on SAX callbacks
14499 * @buffer: an in-memory XML document input
14500 * @size: the length of the XML document in bytes
14501 *
14502 * A better SAX parsing routine.
14503 * parse an XML in-memory buffer and call the given SAX handler routines.
14504 *
14505 * Returns 0 in case of success or a error number otherwise
14506 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14507 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14508 const char *buffer, int size) {
14509 int ret = 0;
14510 xmlParserCtxtPtr ctxt;
14511
14512 xmlInitParser();
14513
14514 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14515 if (ctxt == NULL) return -1;
14516 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14517 xmlFree(ctxt->sax);
14518 ctxt->sax = sax;
14519 xmlDetectSAX2(ctxt);
14520
14521 if (user_data != NULL)
14522 ctxt->userData = user_data;
14523
14524 xmlParseDocument(ctxt);
14525
14526 if (ctxt->wellFormed)
14527 ret = 0;
14528 else {
14529 if (ctxt->errNo != 0)
14530 ret = ctxt->errNo;
14531 else
14532 ret = -1;
14533 }
14534 if (sax != NULL)
14535 ctxt->sax = NULL;
14536 if (ctxt->myDoc != NULL) {
14537 xmlFreeDoc(ctxt->myDoc);
14538 ctxt->myDoc = NULL;
14539 }
14540 xmlFreeParserCtxt(ctxt);
14541
14542 return ret;
14543 }
14544 #endif /* LIBXML_SAX1_ENABLED */
14545
14546 /**
14547 * xmlCreateDocParserCtxt:
14548 * @cur: a pointer to an array of xmlChar
14549 *
14550 * Creates a parser context for an XML in-memory document.
14551 *
14552 * Returns the new parser context or NULL
14553 */
14554 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14555 xmlCreateDocParserCtxt(const xmlChar *cur) {
14556 int len;
14557
14558 if (cur == NULL)
14559 return(NULL);
14560 len = xmlStrlen(cur);
14561 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14562 }
14563
14564 #ifdef LIBXML_SAX1_ENABLED
14565 /**
14566 * xmlSAXParseDoc:
14567 * @sax: the SAX handler block
14568 * @cur: a pointer to an array of xmlChar
14569 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14570 * documents
14571 *
14572 * parse an XML in-memory document and build a tree.
14573 * It use the given SAX function block to handle the parsing callback.
14574 * If sax is NULL, fallback to the default DOM tree building routines.
14575 *
14576 * Returns the resulting document tree
14577 */
14578
14579 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14580 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14581 xmlDocPtr ret;
14582 xmlParserCtxtPtr ctxt;
14583 xmlSAXHandlerPtr oldsax = NULL;
14584
14585 if (cur == NULL) return(NULL);
14586
14587
14588 ctxt = xmlCreateDocParserCtxt(cur);
14589 if (ctxt == NULL) return(NULL);
14590 if (sax != NULL) {
14591 oldsax = ctxt->sax;
14592 ctxt->sax = sax;
14593 ctxt->userData = NULL;
14594 }
14595 xmlDetectSAX2(ctxt);
14596
14597 xmlParseDocument(ctxt);
14598 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14599 else {
14600 ret = NULL;
14601 xmlFreeDoc(ctxt->myDoc);
14602 ctxt->myDoc = NULL;
14603 }
14604 if (sax != NULL)
14605 ctxt->sax = oldsax;
14606 xmlFreeParserCtxt(ctxt);
14607
14608 return(ret);
14609 }
14610
14611 /**
14612 * xmlParseDoc:
14613 * @cur: a pointer to an array of xmlChar
14614 *
14615 * parse an XML in-memory document and build a tree.
14616 *
14617 * Returns the resulting document tree
14618 */
14619
14620 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14621 xmlParseDoc(const xmlChar *cur) {
14622 return(xmlSAXParseDoc(NULL, cur, 0));
14623 }
14624 #endif /* LIBXML_SAX1_ENABLED */
14625
14626 #ifdef LIBXML_LEGACY_ENABLED
14627 /************************************************************************
14628 * *
14629 * Specific function to keep track of entities references *
14630 * and used by the XSLT debugger *
14631 * *
14632 ************************************************************************/
14633
14634 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14635
14636 /**
14637 * xmlAddEntityReference:
14638 * @ent : A valid entity
14639 * @firstNode : A valid first node for children of entity
14640 * @lastNode : A valid last node of children entity
14641 *
14642 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14643 */
14644 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14645 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14646 xmlNodePtr lastNode)
14647 {
14648 if (xmlEntityRefFunc != NULL) {
14649 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14650 }
14651 }
14652
14653
14654 /**
14655 * xmlSetEntityReferenceFunc:
14656 * @func: A valid function
14657 *
14658 * Set the function to call call back when a xml reference has been made
14659 */
14660 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14661 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14662 {
14663 xmlEntityRefFunc = func;
14664 }
14665 #endif /* LIBXML_LEGACY_ENABLED */
14666
14667 /************************************************************************
14668 * *
14669 * Miscellaneous *
14670 * *
14671 ************************************************************************/
14672
14673 #ifdef LIBXML_XPATH_ENABLED
14674 #include <libxml/xpath.h>
14675 #endif
14676
14677 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14678 static int xmlParserInitialized = 0;
14679
14680 /**
14681 * xmlInitParser:
14682 *
14683 * Initialization function for the XML parser.
14684 * This is not reentrant. Call once before processing in case of
14685 * use in multithreaded programs.
14686 */
14687
14688 void
xmlInitParser(void)14689 xmlInitParser(void) {
14690 if (xmlParserInitialized != 0)
14691 return;
14692
14693 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14694 if (xmlFree == free)
14695 atexit(xmlCleanupParser);
14696 #endif
14697
14698 #ifdef LIBXML_THREAD_ENABLED
14699 __xmlGlobalInitMutexLock();
14700 if (xmlParserInitialized == 0) {
14701 #endif
14702 xmlInitThreads();
14703 xmlInitGlobals();
14704 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14705 (xmlGenericError == NULL))
14706 initGenericErrorDefaultFunc(NULL);
14707 xmlInitMemory();
14708 xmlInitializeDict();
14709 xmlInitCharEncodingHandlers();
14710 xmlDefaultSAXHandlerInit();
14711 xmlRegisterDefaultInputCallbacks();
14712 #ifdef LIBXML_OUTPUT_ENABLED
14713 xmlRegisterDefaultOutputCallbacks();
14714 #endif /* LIBXML_OUTPUT_ENABLED */
14715 #ifdef LIBXML_HTML_ENABLED
14716 htmlInitAutoClose();
14717 htmlDefaultSAXHandlerInit();
14718 #endif
14719 #ifdef LIBXML_XPATH_ENABLED
14720 xmlXPathInit();
14721 #endif
14722 xmlParserInitialized = 1;
14723 #ifdef LIBXML_THREAD_ENABLED
14724 }
14725 __xmlGlobalInitMutexUnlock();
14726 #endif
14727 }
14728
14729 /**
14730 * xmlCleanupParser:
14731 *
14732 * This function name is somewhat misleading. It does not clean up
14733 * parser state, it cleans up memory allocated by the library itself.
14734 * It is a cleanup function for the XML library. It tries to reclaim all
14735 * related global memory allocated for the library processing.
14736 * It doesn't deallocate any document related memory. One should
14737 * call xmlCleanupParser() only when the process has finished using
14738 * the library and all XML/HTML documents built with it.
14739 * See also xmlInitParser() which has the opposite function of preparing
14740 * the library for operations.
14741 *
14742 * WARNING: if your application is multithreaded or has plugin support
14743 * calling this may crash the application if another thread or
14744 * a plugin is still using libxml2. It's sometimes very hard to
14745 * guess if libxml2 is in use in the application, some libraries
14746 * or plugins may use it without notice. In case of doubt abstain
14747 * from calling this function or do it just before calling exit()
14748 * to avoid leak reports from valgrind !
14749 */
14750
14751 void
xmlCleanupParser(void)14752 xmlCleanupParser(void) {
14753 if (!xmlParserInitialized)
14754 return;
14755
14756 xmlCleanupCharEncodingHandlers();
14757 #ifdef LIBXML_CATALOG_ENABLED
14758 xmlCatalogCleanup();
14759 #endif
14760 xmlDictCleanup();
14761 xmlCleanupInputCallbacks();
14762 #ifdef LIBXML_OUTPUT_ENABLED
14763 xmlCleanupOutputCallbacks();
14764 #endif
14765 #ifdef LIBXML_SCHEMAS_ENABLED
14766 xmlSchemaCleanupTypes();
14767 xmlRelaxNGCleanupTypes();
14768 #endif
14769 xmlResetLastError();
14770 xmlCleanupGlobals();
14771 xmlCleanupThreads(); /* must be last if called not from the main thread */
14772 xmlCleanupMemory();
14773 xmlParserInitialized = 0;
14774 }
14775
14776 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14777 !defined(_WIN32)
14778 static void
14779 ATTRIBUTE_DESTRUCTOR
xmlDestructor(void)14780 xmlDestructor(void) {
14781 /*
14782 * Calling custom deallocation functions in a destructor can cause
14783 * problems, for example with Nokogiri.
14784 */
14785 if (xmlFree == free)
14786 xmlCleanupParser();
14787 }
14788 #endif
14789
14790 /************************************************************************
14791 * *
14792 * New set (2.6.0) of simpler and more flexible APIs *
14793 * *
14794 ************************************************************************/
14795
14796 /**
14797 * DICT_FREE:
14798 * @str: a string
14799 *
14800 * Free a string if it is not owned by the "dict" dictionary in the
14801 * current scope
14802 */
14803 #define DICT_FREE(str) \
14804 if ((str) && ((!dict) || \
14805 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14806 xmlFree((char *)(str));
14807
14808 /**
14809 * xmlCtxtReset:
14810 * @ctxt: an XML parser context
14811 *
14812 * Reset a parser context
14813 */
14814 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14815 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14816 {
14817 xmlParserInputPtr input;
14818 xmlDictPtr dict;
14819
14820 if (ctxt == NULL)
14821 return;
14822
14823 dict = ctxt->dict;
14824
14825 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14826 xmlFreeInputStream(input);
14827 }
14828 ctxt->inputNr = 0;
14829 ctxt->input = NULL;
14830
14831 ctxt->spaceNr = 0;
14832 if (ctxt->spaceTab != NULL) {
14833 ctxt->spaceTab[0] = -1;
14834 ctxt->space = &ctxt->spaceTab[0];
14835 } else {
14836 ctxt->space = NULL;
14837 }
14838
14839
14840 ctxt->nodeNr = 0;
14841 ctxt->node = NULL;
14842
14843 ctxt->nameNr = 0;
14844 ctxt->name = NULL;
14845
14846 DICT_FREE(ctxt->version);
14847 ctxt->version = NULL;
14848 DICT_FREE(ctxt->encoding);
14849 ctxt->encoding = NULL;
14850 DICT_FREE(ctxt->directory);
14851 ctxt->directory = NULL;
14852 DICT_FREE(ctxt->extSubURI);
14853 ctxt->extSubURI = NULL;
14854 DICT_FREE(ctxt->extSubSystem);
14855 ctxt->extSubSystem = NULL;
14856 if (ctxt->myDoc != NULL)
14857 xmlFreeDoc(ctxt->myDoc);
14858 ctxt->myDoc = NULL;
14859
14860 ctxt->standalone = -1;
14861 ctxt->hasExternalSubset = 0;
14862 ctxt->hasPErefs = 0;
14863 ctxt->html = 0;
14864 ctxt->external = 0;
14865 ctxt->instate = XML_PARSER_START;
14866 ctxt->token = 0;
14867
14868 ctxt->wellFormed = 1;
14869 ctxt->nsWellFormed = 1;
14870 ctxt->disableSAX = 0;
14871 ctxt->valid = 1;
14872 #if 0
14873 ctxt->vctxt.userData = ctxt;
14874 ctxt->vctxt.error = xmlParserValidityError;
14875 ctxt->vctxt.warning = xmlParserValidityWarning;
14876 #endif
14877 ctxt->record_info = 0;
14878 ctxt->checkIndex = 0;
14879 ctxt->inSubset = 0;
14880 ctxt->errNo = XML_ERR_OK;
14881 ctxt->depth = 0;
14882 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14883 ctxt->catalogs = NULL;
14884 ctxt->nbentities = 0;
14885 ctxt->sizeentities = 0;
14886 ctxt->sizeentcopy = 0;
14887 xmlInitNodeInfoSeq(&ctxt->node_seq);
14888
14889 if (ctxt->attsDefault != NULL) {
14890 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14891 ctxt->attsDefault = NULL;
14892 }
14893 if (ctxt->attsSpecial != NULL) {
14894 xmlHashFree(ctxt->attsSpecial, NULL);
14895 ctxt->attsSpecial = NULL;
14896 }
14897
14898 #ifdef LIBXML_CATALOG_ENABLED
14899 if (ctxt->catalogs != NULL)
14900 xmlCatalogFreeLocal(ctxt->catalogs);
14901 #endif
14902 if (ctxt->lastError.code != XML_ERR_OK)
14903 xmlResetError(&ctxt->lastError);
14904 }
14905
14906 /**
14907 * xmlCtxtResetPush:
14908 * @ctxt: an XML parser context
14909 * @chunk: a pointer to an array of chars
14910 * @size: number of chars in the array
14911 * @filename: an optional file name or URI
14912 * @encoding: the document encoding, or NULL
14913 *
14914 * Reset a push parser context
14915 *
14916 * Returns 0 in case of success and 1 in case of error
14917 */
14918 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14919 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14920 int size, const char *filename, const char *encoding)
14921 {
14922 xmlParserInputPtr inputStream;
14923 xmlParserInputBufferPtr buf;
14924 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14925
14926 if (ctxt == NULL)
14927 return(1);
14928
14929 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14930 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14931
14932 buf = xmlAllocParserInputBuffer(enc);
14933 if (buf == NULL)
14934 return(1);
14935
14936 if (ctxt == NULL) {
14937 xmlFreeParserInputBuffer(buf);
14938 return(1);
14939 }
14940
14941 xmlCtxtReset(ctxt);
14942
14943 if (filename == NULL) {
14944 ctxt->directory = NULL;
14945 } else {
14946 ctxt->directory = xmlParserGetDirectory(filename);
14947 }
14948
14949 inputStream = xmlNewInputStream(ctxt);
14950 if (inputStream == NULL) {
14951 xmlFreeParserInputBuffer(buf);
14952 return(1);
14953 }
14954
14955 if (filename == NULL)
14956 inputStream->filename = NULL;
14957 else
14958 inputStream->filename = (char *)
14959 xmlCanonicPath((const xmlChar *) filename);
14960 inputStream->buf = buf;
14961 xmlBufResetInput(buf->buffer, inputStream);
14962
14963 inputPush(ctxt, inputStream);
14964
14965 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14966 (ctxt->input->buf != NULL)) {
14967 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14968 size_t cur = ctxt->input->cur - ctxt->input->base;
14969
14970 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14971
14972 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14973 #ifdef DEBUG_PUSH
14974 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14975 #endif
14976 }
14977
14978 if (encoding != NULL) {
14979 xmlCharEncodingHandlerPtr hdlr;
14980
14981 if (ctxt->encoding != NULL)
14982 xmlFree((xmlChar *) ctxt->encoding);
14983 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14984
14985 hdlr = xmlFindCharEncodingHandler(encoding);
14986 if (hdlr != NULL) {
14987 xmlSwitchToEncoding(ctxt, hdlr);
14988 } else {
14989 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14990 "Unsupported encoding %s\n", BAD_CAST encoding);
14991 }
14992 } else if (enc != XML_CHAR_ENCODING_NONE) {
14993 xmlSwitchEncoding(ctxt, enc);
14994 }
14995
14996 return(0);
14997 }
14998
14999
15000 /**
15001 * xmlCtxtUseOptionsInternal:
15002 * @ctxt: an XML parser context
15003 * @options: a combination of xmlParserOption
15004 * @encoding: the user provided encoding to use
15005 *
15006 * Applies the options to the parser context
15007 *
15008 * Returns 0 in case of success, the set of unknown or unimplemented options
15009 * in case of error.
15010 */
15011 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15012 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15013 {
15014 if (ctxt == NULL)
15015 return(-1);
15016 if (encoding != NULL) {
15017 if (ctxt->encoding != NULL)
15018 xmlFree((xmlChar *) ctxt->encoding);
15019 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15020 }
15021 if (options & XML_PARSE_RECOVER) {
15022 ctxt->recovery = 1;
15023 options -= XML_PARSE_RECOVER;
15024 ctxt->options |= XML_PARSE_RECOVER;
15025 } else
15026 ctxt->recovery = 0;
15027 if (options & XML_PARSE_DTDLOAD) {
15028 ctxt->loadsubset = XML_DETECT_IDS;
15029 options -= XML_PARSE_DTDLOAD;
15030 ctxt->options |= XML_PARSE_DTDLOAD;
15031 } else
15032 ctxt->loadsubset = 0;
15033 if (options & XML_PARSE_DTDATTR) {
15034 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15035 options -= XML_PARSE_DTDATTR;
15036 ctxt->options |= XML_PARSE_DTDATTR;
15037 }
15038 if (options & XML_PARSE_NOENT) {
15039 ctxt->replaceEntities = 1;
15040 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15041 options -= XML_PARSE_NOENT;
15042 ctxt->options |= XML_PARSE_NOENT;
15043 } else
15044 ctxt->replaceEntities = 0;
15045 if (options & XML_PARSE_PEDANTIC) {
15046 ctxt->pedantic = 1;
15047 options -= XML_PARSE_PEDANTIC;
15048 ctxt->options |= XML_PARSE_PEDANTIC;
15049 } else
15050 ctxt->pedantic = 0;
15051 if (options & XML_PARSE_NOBLANKS) {
15052 ctxt->keepBlanks = 0;
15053 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15054 options -= XML_PARSE_NOBLANKS;
15055 ctxt->options |= XML_PARSE_NOBLANKS;
15056 } else
15057 ctxt->keepBlanks = 1;
15058 if (options & XML_PARSE_DTDVALID) {
15059 ctxt->validate = 1;
15060 if (options & XML_PARSE_NOWARNING)
15061 ctxt->vctxt.warning = NULL;
15062 if (options & XML_PARSE_NOERROR)
15063 ctxt->vctxt.error = NULL;
15064 options -= XML_PARSE_DTDVALID;
15065 ctxt->options |= XML_PARSE_DTDVALID;
15066 } else
15067 ctxt->validate = 0;
15068 if (options & XML_PARSE_NOWARNING) {
15069 ctxt->sax->warning = NULL;
15070 options -= XML_PARSE_NOWARNING;
15071 }
15072 if (options & XML_PARSE_NOERROR) {
15073 ctxt->sax->error = NULL;
15074 ctxt->sax->fatalError = NULL;
15075 options -= XML_PARSE_NOERROR;
15076 }
15077 #ifdef LIBXML_SAX1_ENABLED
15078 if (options & XML_PARSE_SAX1) {
15079 ctxt->sax->startElement = xmlSAX2StartElement;
15080 ctxt->sax->endElement = xmlSAX2EndElement;
15081 ctxt->sax->startElementNs = NULL;
15082 ctxt->sax->endElementNs = NULL;
15083 ctxt->sax->initialized = 1;
15084 options -= XML_PARSE_SAX1;
15085 ctxt->options |= XML_PARSE_SAX1;
15086 }
15087 #endif /* LIBXML_SAX1_ENABLED */
15088 if (options & XML_PARSE_NODICT) {
15089 ctxt->dictNames = 0;
15090 options -= XML_PARSE_NODICT;
15091 ctxt->options |= XML_PARSE_NODICT;
15092 } else {
15093 ctxt->dictNames = 1;
15094 }
15095 if (options & XML_PARSE_NOCDATA) {
15096 ctxt->sax->cdataBlock = NULL;
15097 options -= XML_PARSE_NOCDATA;
15098 ctxt->options |= XML_PARSE_NOCDATA;
15099 }
15100 if (options & XML_PARSE_NSCLEAN) {
15101 ctxt->options |= XML_PARSE_NSCLEAN;
15102 options -= XML_PARSE_NSCLEAN;
15103 }
15104 if (options & XML_PARSE_NONET) {
15105 ctxt->options |= XML_PARSE_NONET;
15106 options -= XML_PARSE_NONET;
15107 }
15108 if (options & XML_PARSE_COMPACT) {
15109 ctxt->options |= XML_PARSE_COMPACT;
15110 options -= XML_PARSE_COMPACT;
15111 }
15112 if (options & XML_PARSE_OLD10) {
15113 ctxt->options |= XML_PARSE_OLD10;
15114 options -= XML_PARSE_OLD10;
15115 }
15116 if (options & XML_PARSE_NOBASEFIX) {
15117 ctxt->options |= XML_PARSE_NOBASEFIX;
15118 options -= XML_PARSE_NOBASEFIX;
15119 }
15120 if (options & XML_PARSE_HUGE) {
15121 ctxt->options |= XML_PARSE_HUGE;
15122 options -= XML_PARSE_HUGE;
15123 if (ctxt->dict != NULL)
15124 xmlDictSetLimit(ctxt->dict, 0);
15125 }
15126 if (options & XML_PARSE_OLDSAX) {
15127 ctxt->options |= XML_PARSE_OLDSAX;
15128 options -= XML_PARSE_OLDSAX;
15129 }
15130 if (options & XML_PARSE_IGNORE_ENC) {
15131 ctxt->options |= XML_PARSE_IGNORE_ENC;
15132 options -= XML_PARSE_IGNORE_ENC;
15133 }
15134 if (options & XML_PARSE_BIG_LINES) {
15135 ctxt->options |= XML_PARSE_BIG_LINES;
15136 options -= XML_PARSE_BIG_LINES;
15137 }
15138 ctxt->linenumbers = 1;
15139 return (options);
15140 }
15141
15142 /**
15143 * xmlCtxtUseOptions:
15144 * @ctxt: an XML parser context
15145 * @options: a combination of xmlParserOption
15146 *
15147 * Applies the options to the parser context
15148 *
15149 * Returns 0 in case of success, the set of unknown or unimplemented options
15150 * in case of error.
15151 */
15152 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15153 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15154 {
15155 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15156 }
15157
15158 /**
15159 * xmlDoRead:
15160 * @ctxt: an XML parser context
15161 * @URL: the base URL to use for the document
15162 * @encoding: the document encoding, or NULL
15163 * @options: a combination of xmlParserOption
15164 * @reuse: keep the context for reuse
15165 *
15166 * Common front-end for the xmlRead functions
15167 *
15168 * Returns the resulting document tree or NULL
15169 */
15170 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15171 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15172 int options, int reuse)
15173 {
15174 xmlDocPtr ret;
15175
15176 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15177 if (encoding != NULL) {
15178 xmlCharEncodingHandlerPtr hdlr;
15179
15180 hdlr = xmlFindCharEncodingHandler(encoding);
15181 if (hdlr != NULL)
15182 xmlSwitchToEncoding(ctxt, hdlr);
15183 }
15184 if ((URL != NULL) && (ctxt->input != NULL) &&
15185 (ctxt->input->filename == NULL))
15186 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15187 xmlParseDocument(ctxt);
15188 if ((ctxt->wellFormed) || ctxt->recovery)
15189 ret = ctxt->myDoc;
15190 else {
15191 ret = NULL;
15192 if (ctxt->myDoc != NULL) {
15193 xmlFreeDoc(ctxt->myDoc);
15194 }
15195 }
15196 ctxt->myDoc = NULL;
15197 if (!reuse) {
15198 xmlFreeParserCtxt(ctxt);
15199 }
15200
15201 return (ret);
15202 }
15203
15204 /**
15205 * xmlReadDoc:
15206 * @cur: a pointer to a zero terminated string
15207 * @URL: the base URL to use for the document
15208 * @encoding: the document encoding, or NULL
15209 * @options: a combination of xmlParserOption
15210 *
15211 * parse an XML in-memory document and build a tree.
15212 *
15213 * Returns the resulting document tree
15214 */
15215 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15216 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15217 {
15218 xmlParserCtxtPtr ctxt;
15219
15220 if (cur == NULL)
15221 return (NULL);
15222 xmlInitParser();
15223
15224 ctxt = xmlCreateDocParserCtxt(cur);
15225 if (ctxt == NULL)
15226 return (NULL);
15227 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15228 }
15229
15230 /**
15231 * xmlReadFile:
15232 * @filename: a file or URL
15233 * @encoding: the document encoding, or NULL
15234 * @options: a combination of xmlParserOption
15235 *
15236 * parse an XML file from the filesystem or the network.
15237 *
15238 * Returns the resulting document tree
15239 */
15240 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15241 xmlReadFile(const char *filename, const char *encoding, int options)
15242 {
15243 xmlParserCtxtPtr ctxt;
15244
15245 xmlInitParser();
15246 ctxt = xmlCreateURLParserCtxt(filename, options);
15247 if (ctxt == NULL)
15248 return (NULL);
15249 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15250 }
15251
15252 /**
15253 * xmlReadMemory:
15254 * @buffer: a pointer to a char array
15255 * @size: the size of the array
15256 * @URL: the base URL to use for the document
15257 * @encoding: the document encoding, or NULL
15258 * @options: a combination of xmlParserOption
15259 *
15260 * parse an XML in-memory document and build a tree.
15261 *
15262 * Returns the resulting document tree
15263 */
15264 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15265 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15266 {
15267 xmlParserCtxtPtr ctxt;
15268
15269 xmlInitParser();
15270 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15271 if (ctxt == NULL)
15272 return (NULL);
15273 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15274 }
15275
15276 /**
15277 * xmlReadFd:
15278 * @fd: an open file descriptor
15279 * @URL: the base URL to use for the document
15280 * @encoding: the document encoding, or NULL
15281 * @options: a combination of xmlParserOption
15282 *
15283 * parse an XML from a file descriptor and build a tree.
15284 * NOTE that the file descriptor will not be closed when the
15285 * reader is closed or reset.
15286 *
15287 * Returns the resulting document tree
15288 */
15289 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15290 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15291 {
15292 xmlParserCtxtPtr ctxt;
15293 xmlParserInputBufferPtr input;
15294 xmlParserInputPtr stream;
15295
15296 if (fd < 0)
15297 return (NULL);
15298 xmlInitParser();
15299
15300 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15301 if (input == NULL)
15302 return (NULL);
15303 input->closecallback = NULL;
15304 ctxt = xmlNewParserCtxt();
15305 if (ctxt == NULL) {
15306 xmlFreeParserInputBuffer(input);
15307 return (NULL);
15308 }
15309 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15310 if (stream == NULL) {
15311 xmlFreeParserInputBuffer(input);
15312 xmlFreeParserCtxt(ctxt);
15313 return (NULL);
15314 }
15315 inputPush(ctxt, stream);
15316 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15317 }
15318
15319 /**
15320 * xmlReadIO:
15321 * @ioread: an I/O read function
15322 * @ioclose: an I/O close function
15323 * @ioctx: an I/O handler
15324 * @URL: the base URL to use for the document
15325 * @encoding: the document encoding, or NULL
15326 * @options: a combination of xmlParserOption
15327 *
15328 * parse an XML document from I/O functions and source and build a tree.
15329 *
15330 * Returns the resulting document tree
15331 */
15332 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15333 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15334 void *ioctx, const char *URL, const char *encoding, int options)
15335 {
15336 xmlParserCtxtPtr ctxt;
15337 xmlParserInputBufferPtr input;
15338 xmlParserInputPtr stream;
15339
15340 if (ioread == NULL)
15341 return (NULL);
15342 xmlInitParser();
15343
15344 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15345 XML_CHAR_ENCODING_NONE);
15346 if (input == NULL) {
15347 if (ioclose != NULL)
15348 ioclose(ioctx);
15349 return (NULL);
15350 }
15351 ctxt = xmlNewParserCtxt();
15352 if (ctxt == NULL) {
15353 xmlFreeParserInputBuffer(input);
15354 return (NULL);
15355 }
15356 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15357 if (stream == NULL) {
15358 xmlFreeParserInputBuffer(input);
15359 xmlFreeParserCtxt(ctxt);
15360 return (NULL);
15361 }
15362 inputPush(ctxt, stream);
15363 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15364 }
15365
15366 /**
15367 * xmlCtxtReadDoc:
15368 * @ctxt: an XML parser context
15369 * @cur: a pointer to a zero terminated string
15370 * @URL: the base URL to use for the document
15371 * @encoding: the document encoding, or NULL
15372 * @options: a combination of xmlParserOption
15373 *
15374 * parse an XML in-memory document and build a tree.
15375 * This reuses the existing @ctxt parser context
15376 *
15377 * Returns the resulting document tree
15378 */
15379 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15380 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15381 const char *URL, const char *encoding, int options)
15382 {
15383 xmlParserInputPtr stream;
15384
15385 if (cur == NULL)
15386 return (NULL);
15387 if (ctxt == NULL)
15388 return (NULL);
15389 xmlInitParser();
15390
15391 xmlCtxtReset(ctxt);
15392
15393 stream = xmlNewStringInputStream(ctxt, cur);
15394 if (stream == NULL) {
15395 return (NULL);
15396 }
15397 inputPush(ctxt, stream);
15398 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15399 }
15400
15401 /**
15402 * xmlCtxtReadFile:
15403 * @ctxt: an XML parser context
15404 * @filename: a file or URL
15405 * @encoding: the document encoding, or NULL
15406 * @options: a combination of xmlParserOption
15407 *
15408 * parse an XML file from the filesystem or the network.
15409 * This reuses the existing @ctxt parser context
15410 *
15411 * Returns the resulting document tree
15412 */
15413 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15414 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15415 const char *encoding, int options)
15416 {
15417 xmlParserInputPtr stream;
15418
15419 if (filename == NULL)
15420 return (NULL);
15421 if (ctxt == NULL)
15422 return (NULL);
15423 xmlInitParser();
15424
15425 xmlCtxtReset(ctxt);
15426
15427 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15428 if (stream == NULL) {
15429 return (NULL);
15430 }
15431 inputPush(ctxt, stream);
15432 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15433 }
15434
15435 /**
15436 * xmlCtxtReadMemory:
15437 * @ctxt: an XML parser context
15438 * @buffer: a pointer to a char array
15439 * @size: the size of the array
15440 * @URL: the base URL to use for the document
15441 * @encoding: the document encoding, or NULL
15442 * @options: a combination of xmlParserOption
15443 *
15444 * parse an XML in-memory document and build a tree.
15445 * This reuses the existing @ctxt parser context
15446 *
15447 * Returns the resulting document tree
15448 */
15449 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15450 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15451 const char *URL, const char *encoding, int options)
15452 {
15453 xmlParserInputBufferPtr input;
15454 xmlParserInputPtr stream;
15455
15456 if (ctxt == NULL)
15457 return (NULL);
15458 if (buffer == NULL)
15459 return (NULL);
15460 xmlInitParser();
15461
15462 xmlCtxtReset(ctxt);
15463
15464 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15465 if (input == NULL) {
15466 return(NULL);
15467 }
15468
15469 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15470 if (stream == NULL) {
15471 xmlFreeParserInputBuffer(input);
15472 return(NULL);
15473 }
15474
15475 inputPush(ctxt, stream);
15476 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15477 }
15478
15479 /**
15480 * xmlCtxtReadFd:
15481 * @ctxt: an XML parser context
15482 * @fd: an open file descriptor
15483 * @URL: the base URL to use for the document
15484 * @encoding: the document encoding, or NULL
15485 * @options: a combination of xmlParserOption
15486 *
15487 * parse an XML from a file descriptor and build a tree.
15488 * This reuses the existing @ctxt parser context
15489 * NOTE that the file descriptor will not be closed when the
15490 * reader is closed or reset.
15491 *
15492 * Returns the resulting document tree
15493 */
15494 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15495 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15496 const char *URL, const char *encoding, int options)
15497 {
15498 xmlParserInputBufferPtr input;
15499 xmlParserInputPtr stream;
15500
15501 if (fd < 0)
15502 return (NULL);
15503 if (ctxt == NULL)
15504 return (NULL);
15505 xmlInitParser();
15506
15507 xmlCtxtReset(ctxt);
15508
15509
15510 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15511 if (input == NULL)
15512 return (NULL);
15513 input->closecallback = NULL;
15514 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15515 if (stream == NULL) {
15516 xmlFreeParserInputBuffer(input);
15517 return (NULL);
15518 }
15519 inputPush(ctxt, stream);
15520 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15521 }
15522
15523 /**
15524 * xmlCtxtReadIO:
15525 * @ctxt: an XML parser context
15526 * @ioread: an I/O read function
15527 * @ioclose: an I/O close function
15528 * @ioctx: an I/O handler
15529 * @URL: the base URL to use for the document
15530 * @encoding: the document encoding, or NULL
15531 * @options: a combination of xmlParserOption
15532 *
15533 * parse an XML document from I/O functions and source and build a tree.
15534 * This reuses the existing @ctxt parser context
15535 *
15536 * Returns the resulting document tree
15537 */
15538 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15539 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15540 xmlInputCloseCallback ioclose, void *ioctx,
15541 const char *URL,
15542 const char *encoding, int options)
15543 {
15544 xmlParserInputBufferPtr input;
15545 xmlParserInputPtr stream;
15546
15547 if (ioread == NULL)
15548 return (NULL);
15549 if (ctxt == NULL)
15550 return (NULL);
15551 xmlInitParser();
15552
15553 xmlCtxtReset(ctxt);
15554
15555 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15556 XML_CHAR_ENCODING_NONE);
15557 if (input == NULL) {
15558 if (ioclose != NULL)
15559 ioclose(ioctx);
15560 return (NULL);
15561 }
15562 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15563 if (stream == NULL) {
15564 xmlFreeParserInputBuffer(input);
15565 return (NULL);
15566 }
15567 inputPush(ctxt, stream);
15568 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15569 }
15570
15571