1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 struct _xmlStartTag {
91 const xmlChar *prefix;
92 const xmlChar *URI;
93 int line;
94 int nsNr;
95 };
96
97 static void
98 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99
100 static xmlParserCtxtPtr
101 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 const xmlChar *base, xmlParserCtxtPtr pctx);
103
104 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105
106 static int
107 xmlParseElementStart(xmlParserCtxtPtr ctxt);
108
109 static void
110 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111
112 /************************************************************************
113 * *
114 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
115 * *
116 ************************************************************************/
117
118 #define XML_MAX_HUGE_LENGTH 1000000000
119
120 #define XML_PARSER_BIG_ENTITY 1000
121 #define XML_PARSER_LOT_ENTITY 5000
122
123 /*
124 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
125 * replacement over the size in byte of the input indicates that you have
126 * and exponential behaviour. A value of 10 correspond to at least 3 entity
127 * replacement per byte of input.
128 */
129 #define XML_PARSER_NON_LINEAR 10
130
131 /*
132 * xmlParserEntityCheck
133 *
134 * Function to check non-linear entity expansion behaviour
135 * This is here to detect and stop exponential linear entity expansion
136 * This is not a limitation of the parser but a safety
137 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138 * parser option.
139 */
140 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)141 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
142 xmlEntityPtr ent, size_t replacement)
143 {
144 size_t consumed = 0;
145 int i;
146
147 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
148 return (0);
149 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
150 return (1);
151
152 /*
153 * This may look absurd but is needed to detect
154 * entities problems
155 */
156 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
157 (ent->content != NULL) && (ent->checked == 0) &&
158 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
159 unsigned long oldnbent = ctxt->nbentities, diff;
160 xmlChar *rep;
161
162 ent->checked = 1;
163
164 ++ctxt->depth;
165 rep = xmlStringDecodeEntities(ctxt, ent->content,
166 XML_SUBSTITUTE_REF, 0, 0, 0);
167 --ctxt->depth;
168 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
169 ent->content[0] = 0;
170 }
171
172 diff = ctxt->nbentities - oldnbent + 1;
173 if (diff > INT_MAX / 2)
174 diff = INT_MAX / 2;
175 ent->checked = diff * 2;
176 if (rep != NULL) {
177 if (xmlStrchr(rep, '<'))
178 ent->checked |= 1;
179 xmlFree(rep);
180 rep = NULL;
181 }
182 }
183
184 /*
185 * Prevent entity exponential check, not just replacement while
186 * parsing the DTD
187 * The check is potentially costly so do that only once in a thousand
188 */
189 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
190 (ctxt->nbentities % 1024 == 0)) {
191 for (i = 0;i < ctxt->inputNr;i++) {
192 consumed += ctxt->inputTab[i]->consumed +
193 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
194 }
195 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 ctxt->instate = XML_PARSER_EOF;
198 return (1);
199 }
200 consumed = 0;
201 }
202
203
204
205 if (replacement != 0) {
206 if (replacement < XML_MAX_TEXT_LENGTH)
207 return(0);
208
209 /*
210 * If the volume of entity copy reaches 10 times the
211 * amount of parsed data and over the large text threshold
212 * then that's very likely to be an abuse.
213 */
214 if (ctxt->input != NULL) {
215 consumed = ctxt->input->consumed +
216 (ctxt->input->cur - ctxt->input->base);
217 }
218 consumed += ctxt->sizeentities;
219
220 if (replacement < XML_PARSER_NON_LINEAR * consumed)
221 return(0);
222 } else if (size != 0) {
223 /*
224 * Do the check based on the replacement size of the entity
225 */
226 if (size < XML_PARSER_BIG_ENTITY)
227 return(0);
228
229 /*
230 * A limit on the amount of text data reasonably used
231 */
232 if (ctxt->input != NULL) {
233 consumed = ctxt->input->consumed +
234 (ctxt->input->cur - ctxt->input->base);
235 }
236 consumed += ctxt->sizeentities;
237
238 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
239 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
240 return (0);
241 } else if (ent != NULL) {
242 /*
243 * use the number of parsed entities in the replacement
244 */
245 size = ent->checked / 2;
246
247 /*
248 * The amount of data parsed counting entities size only once
249 */
250 if (ctxt->input != NULL) {
251 consumed = ctxt->input->consumed +
252 (ctxt->input->cur - ctxt->input->base);
253 }
254 consumed += ctxt->sizeentities;
255
256 /*
257 * Check the density of entities for the amount of data
258 * knowing an entity reference will take at least 3 bytes
259 */
260 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
261 return (0);
262 } else {
263 /*
264 * strange we got no data for checking
265 */
266 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
267 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
268 (ctxt->nbentities <= 10000))
269 return (0);
270 }
271 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
272 return (1);
273 }
274
275 /**
276 * xmlParserMaxDepth:
277 *
278 * arbitrary depth limit for the XML documents that we allow to
279 * process. This is not a limitation of the parser but a safety
280 * boundary feature. It can be disabled with the XML_PARSE_HUGE
281 * parser option.
282 */
283 unsigned int xmlParserMaxDepth = 256;
284
285
286
287 #define SAX2 1
288 #define XML_PARSER_BIG_BUFFER_SIZE 300
289 #define XML_PARSER_BUFFER_SIZE 100
290 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
291
292 /**
293 * XML_PARSER_CHUNK_SIZE
294 *
295 * When calling GROW that's the minimal amount of data
296 * the parser expected to have received. It is not a hard
297 * limit but an optimization when reading strings like Names
298 * It is not strictly needed as long as inputs available characters
299 * are followed by 0, which should be provided by the I/O level
300 */
301 #define XML_PARSER_CHUNK_SIZE 100
302
303 /*
304 * List of XML prefixed PI allowed by W3C specs
305 */
306
307 static const char *xmlW3CPIs[] = {
308 "xml-stylesheet",
309 "xml-model",
310 NULL
311 };
312
313
314 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
315 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
316 const xmlChar **str);
317
318 static xmlParserErrors
319 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
320 xmlSAXHandlerPtr sax,
321 void *user_data, int depth, const xmlChar *URL,
322 const xmlChar *ID, xmlNodePtr *list);
323
324 static int
325 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
326 const char *encoding);
327 #ifdef LIBXML_LEGACY_ENABLED
328 static void
329 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
330 xmlNodePtr lastNode);
331 #endif /* LIBXML_LEGACY_ENABLED */
332
333 static xmlParserErrors
334 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
335 const xmlChar *string, void *user_data, xmlNodePtr *lst);
336
337 static int
338 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
339
340 /************************************************************************
341 * *
342 * Some factorized error routines *
343 * *
344 ************************************************************************/
345
346 /**
347 * xmlErrAttributeDup:
348 * @ctxt: an XML parser context
349 * @prefix: the attribute prefix
350 * @localname: the attribute localname
351 *
352 * Handle a redefinition of attribute error
353 */
354 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)355 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
356 const xmlChar * localname)
357 {
358 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
359 (ctxt->instate == XML_PARSER_EOF))
360 return;
361 if (ctxt != NULL)
362 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
363
364 if (prefix == NULL)
365 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
366 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
367 (const char *) localname, NULL, NULL, 0, 0,
368 "Attribute %s redefined\n", localname);
369 else
370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
371 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
372 (const char *) prefix, (const char *) localname,
373 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
374 localname);
375 if (ctxt != NULL) {
376 ctxt->wellFormed = 0;
377 if (ctxt->recovery == 0)
378 ctxt->disableSAX = 1;
379 }
380 }
381
382 /**
383 * xmlFatalErr:
384 * @ctxt: an XML parser context
385 * @error: the error number
386 * @extra: extra information string
387 *
388 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
389 */
390 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)391 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
392 {
393 const char *errmsg;
394
395 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
396 (ctxt->instate == XML_PARSER_EOF))
397 return;
398 switch (error) {
399 case XML_ERR_INVALID_HEX_CHARREF:
400 errmsg = "CharRef: invalid hexadecimal value";
401 break;
402 case XML_ERR_INVALID_DEC_CHARREF:
403 errmsg = "CharRef: invalid decimal value";
404 break;
405 case XML_ERR_INVALID_CHARREF:
406 errmsg = "CharRef: invalid value";
407 break;
408 case XML_ERR_INTERNAL_ERROR:
409 errmsg = "internal error";
410 break;
411 case XML_ERR_PEREF_AT_EOF:
412 errmsg = "PEReference at end of document";
413 break;
414 case XML_ERR_PEREF_IN_PROLOG:
415 errmsg = "PEReference in prolog";
416 break;
417 case XML_ERR_PEREF_IN_EPILOG:
418 errmsg = "PEReference in epilog";
419 break;
420 case XML_ERR_PEREF_NO_NAME:
421 errmsg = "PEReference: no name";
422 break;
423 case XML_ERR_PEREF_SEMICOL_MISSING:
424 errmsg = "PEReference: expecting ';'";
425 break;
426 case XML_ERR_ENTITY_LOOP:
427 errmsg = "Detected an entity reference loop";
428 break;
429 case XML_ERR_ENTITY_NOT_STARTED:
430 errmsg = "EntityValue: \" or ' expected";
431 break;
432 case XML_ERR_ENTITY_PE_INTERNAL:
433 errmsg = "PEReferences forbidden in internal subset";
434 break;
435 case XML_ERR_ENTITY_NOT_FINISHED:
436 errmsg = "EntityValue: \" or ' expected";
437 break;
438 case XML_ERR_ATTRIBUTE_NOT_STARTED:
439 errmsg = "AttValue: \" or ' expected";
440 break;
441 case XML_ERR_LT_IN_ATTRIBUTE:
442 errmsg = "Unescaped '<' not allowed in attributes values";
443 break;
444 case XML_ERR_LITERAL_NOT_STARTED:
445 errmsg = "SystemLiteral \" or ' expected";
446 break;
447 case XML_ERR_LITERAL_NOT_FINISHED:
448 errmsg = "Unfinished System or Public ID \" or ' expected";
449 break;
450 case XML_ERR_MISPLACED_CDATA_END:
451 errmsg = "Sequence ']]>' not allowed in content";
452 break;
453 case XML_ERR_URI_REQUIRED:
454 errmsg = "SYSTEM or PUBLIC, the URI is missing";
455 break;
456 case XML_ERR_PUBID_REQUIRED:
457 errmsg = "PUBLIC, the Public Identifier is missing";
458 break;
459 case XML_ERR_HYPHEN_IN_COMMENT:
460 errmsg = "Comment must not contain '--' (double-hyphen)";
461 break;
462 case XML_ERR_PI_NOT_STARTED:
463 errmsg = "xmlParsePI : no target name";
464 break;
465 case XML_ERR_RESERVED_XML_NAME:
466 errmsg = "Invalid PI name";
467 break;
468 case XML_ERR_NOTATION_NOT_STARTED:
469 errmsg = "NOTATION: Name expected here";
470 break;
471 case XML_ERR_NOTATION_NOT_FINISHED:
472 errmsg = "'>' required to close NOTATION declaration";
473 break;
474 case XML_ERR_VALUE_REQUIRED:
475 errmsg = "Entity value required";
476 break;
477 case XML_ERR_URI_FRAGMENT:
478 errmsg = "Fragment not allowed";
479 break;
480 case XML_ERR_ATTLIST_NOT_STARTED:
481 errmsg = "'(' required to start ATTLIST enumeration";
482 break;
483 case XML_ERR_NMTOKEN_REQUIRED:
484 errmsg = "NmToken expected in ATTLIST enumeration";
485 break;
486 case XML_ERR_ATTLIST_NOT_FINISHED:
487 errmsg = "')' required to finish ATTLIST enumeration";
488 break;
489 case XML_ERR_MIXED_NOT_STARTED:
490 errmsg = "MixedContentDecl : '|' or ')*' expected";
491 break;
492 case XML_ERR_PCDATA_REQUIRED:
493 errmsg = "MixedContentDecl : '#PCDATA' expected";
494 break;
495 case XML_ERR_ELEMCONTENT_NOT_STARTED:
496 errmsg = "ContentDecl : Name or '(' expected";
497 break;
498 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
499 errmsg = "ContentDecl : ',' '|' or ')' expected";
500 break;
501 case XML_ERR_PEREF_IN_INT_SUBSET:
502 errmsg =
503 "PEReference: forbidden within markup decl in internal subset";
504 break;
505 case XML_ERR_GT_REQUIRED:
506 errmsg = "expected '>'";
507 break;
508 case XML_ERR_CONDSEC_INVALID:
509 errmsg = "XML conditional section '[' expected";
510 break;
511 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
512 errmsg = "Content error in the external subset";
513 break;
514 case XML_ERR_CONDSEC_INVALID_KEYWORD:
515 errmsg =
516 "conditional section INCLUDE or IGNORE keyword expected";
517 break;
518 case XML_ERR_CONDSEC_NOT_FINISHED:
519 errmsg = "XML conditional section not closed";
520 break;
521 case XML_ERR_XMLDECL_NOT_STARTED:
522 errmsg = "Text declaration '<?xml' required";
523 break;
524 case XML_ERR_XMLDECL_NOT_FINISHED:
525 errmsg = "parsing XML declaration: '?>' expected";
526 break;
527 case XML_ERR_EXT_ENTITY_STANDALONE:
528 errmsg = "external parsed entities cannot be standalone";
529 break;
530 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
531 errmsg = "EntityRef: expecting ';'";
532 break;
533 case XML_ERR_DOCTYPE_NOT_FINISHED:
534 errmsg = "DOCTYPE improperly terminated";
535 break;
536 case XML_ERR_LTSLASH_REQUIRED:
537 errmsg = "EndTag: '</' not found";
538 break;
539 case XML_ERR_EQUAL_REQUIRED:
540 errmsg = "expected '='";
541 break;
542 case XML_ERR_STRING_NOT_CLOSED:
543 errmsg = "String not closed expecting \" or '";
544 break;
545 case XML_ERR_STRING_NOT_STARTED:
546 errmsg = "String not started expecting ' or \"";
547 break;
548 case XML_ERR_ENCODING_NAME:
549 errmsg = "Invalid XML encoding name";
550 break;
551 case XML_ERR_STANDALONE_VALUE:
552 errmsg = "standalone accepts only 'yes' or 'no'";
553 break;
554 case XML_ERR_DOCUMENT_EMPTY:
555 errmsg = "Document is empty";
556 break;
557 case XML_ERR_DOCUMENT_END:
558 errmsg = "Extra content at the end of the document";
559 break;
560 case XML_ERR_NOT_WELL_BALANCED:
561 errmsg = "chunk is not well balanced";
562 break;
563 case XML_ERR_EXTRA_CONTENT:
564 errmsg = "extra content at the end of well balanced chunk";
565 break;
566 case XML_ERR_VERSION_MISSING:
567 errmsg = "Malformed declaration expecting version";
568 break;
569 case XML_ERR_NAME_TOO_LONG:
570 errmsg = "Name too long";
571 break;
572 #if 0
573 case:
574 errmsg = "";
575 break;
576 #endif
577 default:
578 errmsg = "Unregistered error message";
579 }
580 if (ctxt != NULL)
581 ctxt->errNo = error;
582 if (info == NULL) {
583 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
584 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
585 errmsg);
586 } else {
587 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
588 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
589 errmsg, info);
590 }
591 if (ctxt != NULL) {
592 ctxt->wellFormed = 0;
593 if (ctxt->recovery == 0)
594 ctxt->disableSAX = 1;
595 }
596 }
597
598 /**
599 * xmlFatalErrMsg:
600 * @ctxt: an XML parser context
601 * @error: the error number
602 * @msg: the error message
603 *
604 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
605 */
606 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)607 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
608 const char *msg)
609 {
610 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
611 (ctxt->instate == XML_PARSER_EOF))
612 return;
613 if (ctxt != NULL)
614 ctxt->errNo = error;
615 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
617 if (ctxt != NULL) {
618 ctxt->wellFormed = 0;
619 if (ctxt->recovery == 0)
620 ctxt->disableSAX = 1;
621 }
622 }
623
624 /**
625 * xmlWarningMsg:
626 * @ctxt: an XML parser context
627 * @error: the error number
628 * @msg: the error message
629 * @str1: extra data
630 * @str2: extra data
631 *
632 * Handle a warning.
633 */
634 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)635 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
636 const char *msg, const xmlChar *str1, const xmlChar *str2)
637 {
638 xmlStructuredErrorFunc schannel = NULL;
639
640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
643 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
644 (ctxt->sax->initialized == XML_SAX2_MAGIC))
645 schannel = ctxt->sax->serror;
646 if (ctxt != NULL) {
647 __xmlRaiseError(schannel,
648 (ctxt->sax) ? ctxt->sax->warning : NULL,
649 ctxt->userData,
650 ctxt, NULL, XML_FROM_PARSER, error,
651 XML_ERR_WARNING, NULL, 0,
652 (const char *) str1, (const char *) str2, NULL, 0, 0,
653 msg, (const char *) str1, (const char *) str2);
654 } else {
655 __xmlRaiseError(schannel, NULL, NULL,
656 ctxt, NULL, XML_FROM_PARSER, error,
657 XML_ERR_WARNING, NULL, 0,
658 (const char *) str1, (const char *) str2, NULL, 0, 0,
659 msg, (const char *) str1, (const char *) str2);
660 }
661 }
662
663 /**
664 * xmlValidityError:
665 * @ctxt: an XML parser context
666 * @error: the error number
667 * @msg: the error message
668 * @str1: extra data
669 *
670 * Handle a validity error.
671 */
672 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)673 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
674 const char *msg, const xmlChar *str1, const xmlChar *str2)
675 {
676 xmlStructuredErrorFunc schannel = NULL;
677
678 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
679 (ctxt->instate == XML_PARSER_EOF))
680 return;
681 if (ctxt != NULL) {
682 ctxt->errNo = error;
683 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
684 schannel = ctxt->sax->serror;
685 }
686 if (ctxt != NULL) {
687 __xmlRaiseError(schannel,
688 ctxt->vctxt.error, ctxt->vctxt.userData,
689 ctxt, NULL, XML_FROM_DTD, error,
690 XML_ERR_ERROR, NULL, 0, (const char *) str1,
691 (const char *) str2, NULL, 0, 0,
692 msg, (const char *) str1, (const char *) str2);
693 ctxt->valid = 0;
694 } else {
695 __xmlRaiseError(schannel, NULL, NULL,
696 ctxt, NULL, XML_FROM_DTD, error,
697 XML_ERR_ERROR, NULL, 0, (const char *) str1,
698 (const char *) str2, NULL, 0, 0,
699 msg, (const char *) str1, (const char *) str2);
700 }
701 }
702
703 /**
704 * xmlFatalErrMsgInt:
705 * @ctxt: an XML parser context
706 * @error: the error number
707 * @msg: the error message
708 * @val: an integer value
709 *
710 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
711 */
712 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)713 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714 const char *msg, int val)
715 {
716 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
717 (ctxt->instate == XML_PARSER_EOF))
718 return;
719 if (ctxt != NULL)
720 ctxt->errNo = error;
721 __xmlRaiseError(NULL, NULL, NULL,
722 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
723 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
724 if (ctxt != NULL) {
725 ctxt->wellFormed = 0;
726 if (ctxt->recovery == 0)
727 ctxt->disableSAX = 1;
728 }
729 }
730
731 /**
732 * xmlFatalErrMsgStrIntStr:
733 * @ctxt: an XML parser context
734 * @error: the error number
735 * @msg: the error message
736 * @str1: an string info
737 * @val: an integer value
738 * @str2: an string info
739 *
740 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
741 */
742 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)743 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
744 const char *msg, const xmlChar *str1, int val,
745 const xmlChar *str2)
746 {
747 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
748 (ctxt->instate == XML_PARSER_EOF))
749 return;
750 if (ctxt != NULL)
751 ctxt->errNo = error;
752 __xmlRaiseError(NULL, NULL, NULL,
753 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
754 NULL, 0, (const char *) str1, (const char *) str2,
755 NULL, val, 0, msg, str1, val, str2);
756 if (ctxt != NULL) {
757 ctxt->wellFormed = 0;
758 if (ctxt->recovery == 0)
759 ctxt->disableSAX = 1;
760 }
761 }
762
763 /**
764 * xmlFatalErrMsgStr:
765 * @ctxt: an XML parser context
766 * @error: the error number
767 * @msg: the error message
768 * @val: a string value
769 *
770 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
771 */
772 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)773 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
774 const char *msg, const xmlChar * val)
775 {
776 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
777 (ctxt->instate == XML_PARSER_EOF))
778 return;
779 if (ctxt != NULL)
780 ctxt->errNo = error;
781 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
782 XML_FROM_PARSER, error, XML_ERR_FATAL,
783 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
784 val);
785 if (ctxt != NULL) {
786 ctxt->wellFormed = 0;
787 if (ctxt->recovery == 0)
788 ctxt->disableSAX = 1;
789 }
790 }
791
792 /**
793 * xmlErrMsgStr:
794 * @ctxt: an XML parser context
795 * @error: the error number
796 * @msg: the error message
797 * @val: a string value
798 *
799 * Handle a non fatal parser error
800 */
801 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)802 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
803 const char *msg, const xmlChar * val)
804 {
805 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
806 (ctxt->instate == XML_PARSER_EOF))
807 return;
808 if (ctxt != NULL)
809 ctxt->errNo = error;
810 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
811 XML_FROM_PARSER, error, XML_ERR_ERROR,
812 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
813 val);
814 }
815
816 /**
817 * xmlNsErr:
818 * @ctxt: an XML parser context
819 * @error: the error number
820 * @msg: the message
821 * @info1: extra information string
822 * @info2: extra information string
823 *
824 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
825 */
826 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)827 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
828 const char *msg,
829 const xmlChar * info1, const xmlChar * info2,
830 const xmlChar * info3)
831 {
832 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
833 (ctxt->instate == XML_PARSER_EOF))
834 return;
835 if (ctxt != NULL)
836 ctxt->errNo = error;
837 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
838 XML_ERR_ERROR, NULL, 0, (const char *) info1,
839 (const char *) info2, (const char *) info3, 0, 0, msg,
840 info1, info2, info3);
841 if (ctxt != NULL)
842 ctxt->nsWellFormed = 0;
843 }
844
845 /**
846 * xmlNsWarn
847 * @ctxt: an XML parser context
848 * @error: the error number
849 * @msg: the message
850 * @info1: extra information string
851 * @info2: extra information string
852 *
853 * Handle a namespace warning error
854 */
855 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)856 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
857 const char *msg,
858 const xmlChar * info1, const xmlChar * info2,
859 const xmlChar * info3)
860 {
861 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
862 (ctxt->instate == XML_PARSER_EOF))
863 return;
864 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
865 XML_ERR_WARNING, NULL, 0, (const char *) info1,
866 (const char *) info2, (const char *) info3, 0, 0, msg,
867 info1, info2, info3);
868 }
869
870 /************************************************************************
871 * *
872 * Library wide options *
873 * *
874 ************************************************************************/
875
876 /**
877 * xmlHasFeature:
878 * @feature: the feature to be examined
879 *
880 * Examines if the library has been compiled with a given feature.
881 *
882 * Returns a non-zero value if the feature exist, otherwise zero.
883 * Returns zero (0) if the feature does not exist or an unknown
884 * unknown feature is requested, non-zero otherwise.
885 */
886 int
xmlHasFeature(xmlFeature feature)887 xmlHasFeature(xmlFeature feature)
888 {
889 switch (feature) {
890 case XML_WITH_THREAD:
891 #ifdef LIBXML_THREAD_ENABLED
892 return(1);
893 #else
894 return(0);
895 #endif
896 case XML_WITH_TREE:
897 #ifdef LIBXML_TREE_ENABLED
898 return(1);
899 #else
900 return(0);
901 #endif
902 case XML_WITH_OUTPUT:
903 #ifdef LIBXML_OUTPUT_ENABLED
904 return(1);
905 #else
906 return(0);
907 #endif
908 case XML_WITH_PUSH:
909 #ifdef LIBXML_PUSH_ENABLED
910 return(1);
911 #else
912 return(0);
913 #endif
914 case XML_WITH_READER:
915 #ifdef LIBXML_READER_ENABLED
916 return(1);
917 #else
918 return(0);
919 #endif
920 case XML_WITH_PATTERN:
921 #ifdef LIBXML_PATTERN_ENABLED
922 return(1);
923 #else
924 return(0);
925 #endif
926 case XML_WITH_WRITER:
927 #ifdef LIBXML_WRITER_ENABLED
928 return(1);
929 #else
930 return(0);
931 #endif
932 case XML_WITH_SAX1:
933 #ifdef LIBXML_SAX1_ENABLED
934 return(1);
935 #else
936 return(0);
937 #endif
938 case XML_WITH_FTP:
939 #ifdef LIBXML_FTP_ENABLED
940 return(1);
941 #else
942 return(0);
943 #endif
944 case XML_WITH_HTTP:
945 #ifdef LIBXML_HTTP_ENABLED
946 return(1);
947 #else
948 return(0);
949 #endif
950 case XML_WITH_VALID:
951 #ifdef LIBXML_VALID_ENABLED
952 return(1);
953 #else
954 return(0);
955 #endif
956 case XML_WITH_HTML:
957 #ifdef LIBXML_HTML_ENABLED
958 return(1);
959 #else
960 return(0);
961 #endif
962 case XML_WITH_LEGACY:
963 #ifdef LIBXML_LEGACY_ENABLED
964 return(1);
965 #else
966 return(0);
967 #endif
968 case XML_WITH_C14N:
969 #ifdef LIBXML_C14N_ENABLED
970 return(1);
971 #else
972 return(0);
973 #endif
974 case XML_WITH_CATALOG:
975 #ifdef LIBXML_CATALOG_ENABLED
976 return(1);
977 #else
978 return(0);
979 #endif
980 case XML_WITH_XPATH:
981 #ifdef LIBXML_XPATH_ENABLED
982 return(1);
983 #else
984 return(0);
985 #endif
986 case XML_WITH_XPTR:
987 #ifdef LIBXML_XPTR_ENABLED
988 return(1);
989 #else
990 return(0);
991 #endif
992 case XML_WITH_XINCLUDE:
993 #ifdef LIBXML_XINCLUDE_ENABLED
994 return(1);
995 #else
996 return(0);
997 #endif
998 case XML_WITH_ICONV:
999 #ifdef LIBXML_ICONV_ENABLED
1000 return(1);
1001 #else
1002 return(0);
1003 #endif
1004 case XML_WITH_ISO8859X:
1005 #ifdef LIBXML_ISO8859X_ENABLED
1006 return(1);
1007 #else
1008 return(0);
1009 #endif
1010 case XML_WITH_UNICODE:
1011 #ifdef LIBXML_UNICODE_ENABLED
1012 return(1);
1013 #else
1014 return(0);
1015 #endif
1016 case XML_WITH_REGEXP:
1017 #ifdef LIBXML_REGEXP_ENABLED
1018 return(1);
1019 #else
1020 return(0);
1021 #endif
1022 case XML_WITH_AUTOMATA:
1023 #ifdef LIBXML_AUTOMATA_ENABLED
1024 return(1);
1025 #else
1026 return(0);
1027 #endif
1028 case XML_WITH_EXPR:
1029 #ifdef LIBXML_EXPR_ENABLED
1030 return(1);
1031 #else
1032 return(0);
1033 #endif
1034 case XML_WITH_SCHEMAS:
1035 #ifdef LIBXML_SCHEMAS_ENABLED
1036 return(1);
1037 #else
1038 return(0);
1039 #endif
1040 case XML_WITH_SCHEMATRON:
1041 #ifdef LIBXML_SCHEMATRON_ENABLED
1042 return(1);
1043 #else
1044 return(0);
1045 #endif
1046 case XML_WITH_MODULES:
1047 #ifdef LIBXML_MODULES_ENABLED
1048 return(1);
1049 #else
1050 return(0);
1051 #endif
1052 case XML_WITH_DEBUG:
1053 #ifdef LIBXML_DEBUG_ENABLED
1054 return(1);
1055 #else
1056 return(0);
1057 #endif
1058 case XML_WITH_DEBUG_MEM:
1059 #ifdef DEBUG_MEMORY_LOCATION
1060 return(1);
1061 #else
1062 return(0);
1063 #endif
1064 case XML_WITH_DEBUG_RUN:
1065 #ifdef LIBXML_DEBUG_RUNTIME
1066 return(1);
1067 #else
1068 return(0);
1069 #endif
1070 case XML_WITH_ZLIB:
1071 #ifdef LIBXML_ZLIB_ENABLED
1072 return(1);
1073 #else
1074 return(0);
1075 #endif
1076 case XML_WITH_LZMA:
1077 #ifdef LIBXML_LZMA_ENABLED
1078 return(1);
1079 #else
1080 return(0);
1081 #endif
1082 case XML_WITH_ICU:
1083 #ifdef LIBXML_ICU_ENABLED
1084 return(1);
1085 #else
1086 return(0);
1087 #endif
1088 default:
1089 break;
1090 }
1091 return(0);
1092 }
1093
1094 /************************************************************************
1095 * *
1096 * SAX2 defaulted attributes handling *
1097 * *
1098 ************************************************************************/
1099
1100 /**
1101 * xmlDetectSAX2:
1102 * @ctxt: an XML parser context
1103 *
1104 * Do the SAX2 detection and specific initialization
1105 */
1106 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1107 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1108 xmlSAXHandlerPtr sax;
1109 if (ctxt == NULL) return;
1110 sax = ctxt->sax;
1111 #ifdef LIBXML_SAX1_ENABLED
1112 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1113 ((sax->startElementNs != NULL) ||
1114 (sax->endElementNs != NULL) ||
1115 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1116 ctxt->sax2 = 1;
1117 #else
1118 ctxt->sax2 = 1;
1119 #endif /* LIBXML_SAX1_ENABLED */
1120
1121 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1122 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1123 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1124 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1125 (ctxt->str_xml_ns == NULL)) {
1126 xmlErrMemory(ctxt, NULL);
1127 }
1128 }
1129
1130 typedef struct _xmlDefAttrs xmlDefAttrs;
1131 typedef xmlDefAttrs *xmlDefAttrsPtr;
1132 struct _xmlDefAttrs {
1133 int nbAttrs; /* number of defaulted attributes on that element */
1134 int maxAttrs; /* the size of the array */
1135 #if __STDC_VERSION__ >= 199901L
1136 /* Using a C99 flexible array member avoids UBSan errors. */
1137 const xmlChar *values[]; /* array of localname/prefix/values/external */
1138 #else
1139 const xmlChar *values[5];
1140 #endif
1141 };
1142
1143 /**
1144 * xmlAttrNormalizeSpace:
1145 * @src: the source string
1146 * @dst: the target string
1147 *
1148 * Normalize the space in non CDATA attribute values:
1149 * If the attribute type is not CDATA, then the XML processor MUST further
1150 * process the normalized attribute value by discarding any leading and
1151 * trailing space (#x20) characters, and by replacing sequences of space
1152 * (#x20) characters by a single space (#x20) character.
1153 * Note that the size of dst need to be at least src, and if one doesn't need
1154 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1155 * passing src as dst is just fine.
1156 *
1157 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1158 * is needed.
1159 */
1160 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1161 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1162 {
1163 if ((src == NULL) || (dst == NULL))
1164 return(NULL);
1165
1166 while (*src == 0x20) src++;
1167 while (*src != 0) {
1168 if (*src == 0x20) {
1169 while (*src == 0x20) src++;
1170 if (*src != 0)
1171 *dst++ = 0x20;
1172 } else {
1173 *dst++ = *src++;
1174 }
1175 }
1176 *dst = 0;
1177 if (dst == src)
1178 return(NULL);
1179 return(dst);
1180 }
1181
1182 /**
1183 * xmlAttrNormalizeSpace2:
1184 * @src: the source string
1185 *
1186 * Normalize the space in non CDATA attribute values, a slightly more complex
1187 * front end to avoid allocation problems when running on attribute values
1188 * coming from the input.
1189 *
1190 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1191 * is needed.
1192 */
1193 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1194 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1195 {
1196 int i;
1197 int remove_head = 0;
1198 int need_realloc = 0;
1199 const xmlChar *cur;
1200
1201 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1202 return(NULL);
1203 i = *len;
1204 if (i <= 0)
1205 return(NULL);
1206
1207 cur = src;
1208 while (*cur == 0x20) {
1209 cur++;
1210 remove_head++;
1211 }
1212 while (*cur != 0) {
1213 if (*cur == 0x20) {
1214 cur++;
1215 if ((*cur == 0x20) || (*cur == 0)) {
1216 need_realloc = 1;
1217 break;
1218 }
1219 } else
1220 cur++;
1221 }
1222 if (need_realloc) {
1223 xmlChar *ret;
1224
1225 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1226 if (ret == NULL) {
1227 xmlErrMemory(ctxt, NULL);
1228 return(NULL);
1229 }
1230 xmlAttrNormalizeSpace(ret, ret);
1231 *len = (int) strlen((const char *)ret);
1232 return(ret);
1233 } else if (remove_head) {
1234 *len -= remove_head;
1235 memmove(src, src + remove_head, 1 + *len);
1236 return(src);
1237 }
1238 return(NULL);
1239 }
1240
1241 /**
1242 * xmlAddDefAttrs:
1243 * @ctxt: an XML parser context
1244 * @fullname: the element fullname
1245 * @fullattr: the attribute fullname
1246 * @value: the attribute value
1247 *
1248 * Add a defaulted attribute for an element
1249 */
1250 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1251 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1252 const xmlChar *fullname,
1253 const xmlChar *fullattr,
1254 const xmlChar *value) {
1255 xmlDefAttrsPtr defaults;
1256 int len;
1257 const xmlChar *name;
1258 const xmlChar *prefix;
1259
1260 /*
1261 * Allows to detect attribute redefinitions
1262 */
1263 if (ctxt->attsSpecial != NULL) {
1264 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1265 return;
1266 }
1267
1268 if (ctxt->attsDefault == NULL) {
1269 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1270 if (ctxt->attsDefault == NULL)
1271 goto mem_error;
1272 }
1273
1274 /*
1275 * split the element name into prefix:localname , the string found
1276 * are within the DTD and then not associated to namespace names.
1277 */
1278 name = xmlSplitQName3(fullname, &len);
1279 if (name == NULL) {
1280 name = xmlDictLookup(ctxt->dict, fullname, -1);
1281 prefix = NULL;
1282 } else {
1283 name = xmlDictLookup(ctxt->dict, name, -1);
1284 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1285 }
1286
1287 /*
1288 * make sure there is some storage
1289 */
1290 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1291 if (defaults == NULL) {
1292 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1293 (4 * 5) * sizeof(const xmlChar *));
1294 if (defaults == NULL)
1295 goto mem_error;
1296 defaults->nbAttrs = 0;
1297 defaults->maxAttrs = 4;
1298 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1299 defaults, NULL) < 0) {
1300 xmlFree(defaults);
1301 goto mem_error;
1302 }
1303 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1304 xmlDefAttrsPtr temp;
1305
1306 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1307 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1308 if (temp == NULL)
1309 goto mem_error;
1310 defaults = temp;
1311 defaults->maxAttrs *= 2;
1312 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1313 defaults, NULL) < 0) {
1314 xmlFree(defaults);
1315 goto mem_error;
1316 }
1317 }
1318
1319 /*
1320 * Split the element name into prefix:localname , the string found
1321 * are within the DTD and hen not associated to namespace names.
1322 */
1323 name = xmlSplitQName3(fullattr, &len);
1324 if (name == NULL) {
1325 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1326 prefix = NULL;
1327 } else {
1328 name = xmlDictLookup(ctxt->dict, name, -1);
1329 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1330 }
1331
1332 defaults->values[5 * defaults->nbAttrs] = name;
1333 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1334 /* intern the string and precompute the end */
1335 len = xmlStrlen(value);
1336 value = xmlDictLookup(ctxt->dict, value, len);
1337 defaults->values[5 * defaults->nbAttrs + 2] = value;
1338 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1339 if (ctxt->external)
1340 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1341 else
1342 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1343 defaults->nbAttrs++;
1344
1345 return;
1346
1347 mem_error:
1348 xmlErrMemory(ctxt, NULL);
1349 return;
1350 }
1351
1352 /**
1353 * xmlAddSpecialAttr:
1354 * @ctxt: an XML parser context
1355 * @fullname: the element fullname
1356 * @fullattr: the attribute fullname
1357 * @type: the attribute type
1358 *
1359 * Register this attribute type
1360 */
1361 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1362 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1363 const xmlChar *fullname,
1364 const xmlChar *fullattr,
1365 int type)
1366 {
1367 if (ctxt->attsSpecial == NULL) {
1368 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1369 if (ctxt->attsSpecial == NULL)
1370 goto mem_error;
1371 }
1372
1373 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1374 return;
1375
1376 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1377 (void *) (ptrdiff_t) type);
1378 return;
1379
1380 mem_error:
1381 xmlErrMemory(ctxt, NULL);
1382 return;
1383 }
1384
1385 /**
1386 * xmlCleanSpecialAttrCallback:
1387 *
1388 * Removes CDATA attributes from the special attribute table
1389 */
1390 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1391 xmlCleanSpecialAttrCallback(void *payload, void *data,
1392 const xmlChar *fullname, const xmlChar *fullattr,
1393 const xmlChar *unused ATTRIBUTE_UNUSED) {
1394 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1395
1396 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1397 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1398 }
1399 }
1400
1401 /**
1402 * xmlCleanSpecialAttr:
1403 * @ctxt: an XML parser context
1404 *
1405 * Trim the list of attributes defined to remove all those of type
1406 * CDATA as they are not special. This call should be done when finishing
1407 * to parse the DTD and before starting to parse the document root.
1408 */
1409 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1410 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1411 {
1412 if (ctxt->attsSpecial == NULL)
1413 return;
1414
1415 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1416
1417 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1418 xmlHashFree(ctxt->attsSpecial, NULL);
1419 ctxt->attsSpecial = NULL;
1420 }
1421 return;
1422 }
1423
1424 /**
1425 * xmlCheckLanguageID:
1426 * @lang: pointer to the string value
1427 *
1428 * Checks that the value conforms to the LanguageID production:
1429 *
1430 * NOTE: this is somewhat deprecated, those productions were removed from
1431 * the XML Second edition.
1432 *
1433 * [33] LanguageID ::= Langcode ('-' Subcode)*
1434 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1435 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1436 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1437 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1438 * [38] Subcode ::= ([a-z] | [A-Z])+
1439 *
1440 * The current REC reference the successors of RFC 1766, currently 5646
1441 *
1442 * http://www.rfc-editor.org/rfc/rfc5646.txt
1443 * langtag = language
1444 * ["-" script]
1445 * ["-" region]
1446 * *("-" variant)
1447 * *("-" extension)
1448 * ["-" privateuse]
1449 * language = 2*3ALPHA ; shortest ISO 639 code
1450 * ["-" extlang] ; sometimes followed by
1451 * ; extended language subtags
1452 * / 4ALPHA ; or reserved for future use
1453 * / 5*8ALPHA ; or registered language subtag
1454 *
1455 * extlang = 3ALPHA ; selected ISO 639 codes
1456 * *2("-" 3ALPHA) ; permanently reserved
1457 *
1458 * script = 4ALPHA ; ISO 15924 code
1459 *
1460 * region = 2ALPHA ; ISO 3166-1 code
1461 * / 3DIGIT ; UN M.49 code
1462 *
1463 * variant = 5*8alphanum ; registered variants
1464 * / (DIGIT 3alphanum)
1465 *
1466 * extension = singleton 1*("-" (2*8alphanum))
1467 *
1468 * ; Single alphanumerics
1469 * ; "x" reserved for private use
1470 * singleton = DIGIT ; 0 - 9
1471 * / %x41-57 ; A - W
1472 * / %x59-5A ; Y - Z
1473 * / %x61-77 ; a - w
1474 * / %x79-7A ; y - z
1475 *
1476 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1477 * The parser below doesn't try to cope with extension or privateuse
1478 * that could be added but that's not interoperable anyway
1479 *
1480 * Returns 1 if correct 0 otherwise
1481 **/
1482 int
xmlCheckLanguageID(const xmlChar * lang)1483 xmlCheckLanguageID(const xmlChar * lang)
1484 {
1485 const xmlChar *cur = lang, *nxt;
1486
1487 if (cur == NULL)
1488 return (0);
1489 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1490 ((cur[0] == 'I') && (cur[1] == '-')) ||
1491 ((cur[0] == 'x') && (cur[1] == '-')) ||
1492 ((cur[0] == 'X') && (cur[1] == '-'))) {
1493 /*
1494 * Still allow IANA code and user code which were coming
1495 * from the previous version of the XML-1.0 specification
1496 * it's deprecated but we should not fail
1497 */
1498 cur += 2;
1499 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1500 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1501 cur++;
1502 return(cur[0] == 0);
1503 }
1504 nxt = cur;
1505 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1506 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1507 nxt++;
1508 if (nxt - cur >= 4) {
1509 /*
1510 * Reserved
1511 */
1512 if ((nxt - cur > 8) || (nxt[0] != 0))
1513 return(0);
1514 return(1);
1515 }
1516 if (nxt - cur < 2)
1517 return(0);
1518 /* we got an ISO 639 code */
1519 if (nxt[0] == 0)
1520 return(1);
1521 if (nxt[0] != '-')
1522 return(0);
1523
1524 nxt++;
1525 cur = nxt;
1526 /* now we can have extlang or script or region or variant */
1527 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528 goto region_m49;
1529
1530 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532 nxt++;
1533 if (nxt - cur == 4)
1534 goto script;
1535 if (nxt - cur == 2)
1536 goto region;
1537 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1538 goto variant;
1539 if (nxt - cur != 3)
1540 return(0);
1541 /* we parsed an extlang */
1542 if (nxt[0] == 0)
1543 return(1);
1544 if (nxt[0] != '-')
1545 return(0);
1546
1547 nxt++;
1548 cur = nxt;
1549 /* now we can have script or region or variant */
1550 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1551 goto region_m49;
1552
1553 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1554 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1555 nxt++;
1556 if (nxt - cur == 2)
1557 goto region;
1558 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1559 goto variant;
1560 if (nxt - cur != 4)
1561 return(0);
1562 /* we parsed a script */
1563 script:
1564 if (nxt[0] == 0)
1565 return(1);
1566 if (nxt[0] != '-')
1567 return(0);
1568
1569 nxt++;
1570 cur = nxt;
1571 /* now we can have region or variant */
1572 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1573 goto region_m49;
1574
1575 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1576 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1577 nxt++;
1578
1579 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1580 goto variant;
1581 if (nxt - cur != 2)
1582 return(0);
1583 /* we parsed a region */
1584 region:
1585 if (nxt[0] == 0)
1586 return(1);
1587 if (nxt[0] != '-')
1588 return(0);
1589
1590 nxt++;
1591 cur = nxt;
1592 /* now we can just have a variant */
1593 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1594 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1595 nxt++;
1596
1597 if ((nxt - cur < 5) || (nxt - cur > 8))
1598 return(0);
1599
1600 /* we parsed a variant */
1601 variant:
1602 if (nxt[0] == 0)
1603 return(1);
1604 if (nxt[0] != '-')
1605 return(0);
1606 /* extensions and private use subtags not checked */
1607 return (1);
1608
1609 region_m49:
1610 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1611 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1612 nxt += 3;
1613 goto region;
1614 }
1615 return(0);
1616 }
1617
1618 /************************************************************************
1619 * *
1620 * Parser stacks related functions and macros *
1621 * *
1622 ************************************************************************/
1623
1624 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1625 const xmlChar ** str);
1626
1627 #ifdef SAX2
1628 /**
1629 * nsPush:
1630 * @ctxt: an XML parser context
1631 * @prefix: the namespace prefix or NULL
1632 * @URL: the namespace name
1633 *
1634 * Pushes a new parser namespace on top of the ns stack
1635 *
1636 * Returns -1 in case of error, -2 if the namespace should be discarded
1637 * and the index in the stack otherwise.
1638 */
1639 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1640 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1641 {
1642 if (ctxt->options & XML_PARSE_NSCLEAN) {
1643 int i;
1644 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1645 if (ctxt->nsTab[i] == prefix) {
1646 /* in scope */
1647 if (ctxt->nsTab[i + 1] == URL)
1648 return(-2);
1649 /* out of scope keep it */
1650 break;
1651 }
1652 }
1653 }
1654 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1655 ctxt->nsMax = 10;
1656 ctxt->nsNr = 0;
1657 ctxt->nsTab = (const xmlChar **)
1658 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1659 if (ctxt->nsTab == NULL) {
1660 xmlErrMemory(ctxt, NULL);
1661 ctxt->nsMax = 0;
1662 return (-1);
1663 }
1664 } else if (ctxt->nsNr >= ctxt->nsMax) {
1665 const xmlChar ** tmp;
1666 ctxt->nsMax *= 2;
1667 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1668 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1669 if (tmp == NULL) {
1670 xmlErrMemory(ctxt, NULL);
1671 ctxt->nsMax /= 2;
1672 return (-1);
1673 }
1674 ctxt->nsTab = tmp;
1675 }
1676 ctxt->nsTab[ctxt->nsNr++] = prefix;
1677 ctxt->nsTab[ctxt->nsNr++] = URL;
1678 return (ctxt->nsNr);
1679 }
1680 /**
1681 * nsPop:
1682 * @ctxt: an XML parser context
1683 * @nr: the number to pop
1684 *
1685 * Pops the top @nr parser prefix/namespace from the ns stack
1686 *
1687 * Returns the number of namespaces removed
1688 */
1689 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1690 nsPop(xmlParserCtxtPtr ctxt, int nr)
1691 {
1692 int i;
1693
1694 if (ctxt->nsTab == NULL) return(0);
1695 if (ctxt->nsNr < nr) {
1696 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1697 nr = ctxt->nsNr;
1698 }
1699 if (ctxt->nsNr <= 0)
1700 return (0);
1701
1702 for (i = 0;i < nr;i++) {
1703 ctxt->nsNr--;
1704 ctxt->nsTab[ctxt->nsNr] = NULL;
1705 }
1706 return(nr);
1707 }
1708 #endif
1709
1710 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1711 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1712 const xmlChar **atts;
1713 int *attallocs;
1714 int maxatts;
1715
1716 if (ctxt->atts == NULL) {
1717 maxatts = 55; /* allow for 10 attrs by default */
1718 atts = (const xmlChar **)
1719 xmlMalloc(maxatts * sizeof(xmlChar *));
1720 if (atts == NULL) goto mem_error;
1721 ctxt->atts = atts;
1722 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1723 if (attallocs == NULL) goto mem_error;
1724 ctxt->attallocs = attallocs;
1725 ctxt->maxatts = maxatts;
1726 } else if (nr + 5 > ctxt->maxatts) {
1727 maxatts = (nr + 5) * 2;
1728 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1729 maxatts * sizeof(const xmlChar *));
1730 if (atts == NULL) goto mem_error;
1731 ctxt->atts = atts;
1732 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1733 (maxatts / 5) * sizeof(int));
1734 if (attallocs == NULL) goto mem_error;
1735 ctxt->attallocs = attallocs;
1736 ctxt->maxatts = maxatts;
1737 }
1738 return(ctxt->maxatts);
1739 mem_error:
1740 xmlErrMemory(ctxt, NULL);
1741 return(-1);
1742 }
1743
1744 /**
1745 * inputPush:
1746 * @ctxt: an XML parser context
1747 * @value: the parser input
1748 *
1749 * Pushes a new parser input on top of the input stack
1750 *
1751 * Returns -1 in case of error, the index in the stack otherwise
1752 */
1753 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1754 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1755 {
1756 if ((ctxt == NULL) || (value == NULL))
1757 return(-1);
1758 if (ctxt->inputNr >= ctxt->inputMax) {
1759 ctxt->inputMax *= 2;
1760 ctxt->inputTab =
1761 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1762 ctxt->inputMax *
1763 sizeof(ctxt->inputTab[0]));
1764 if (ctxt->inputTab == NULL) {
1765 xmlErrMemory(ctxt, NULL);
1766 xmlFreeInputStream(value);
1767 ctxt->inputMax /= 2;
1768 value = NULL;
1769 return (-1);
1770 }
1771 }
1772 ctxt->inputTab[ctxt->inputNr] = value;
1773 ctxt->input = value;
1774 return (ctxt->inputNr++);
1775 }
1776 /**
1777 * inputPop:
1778 * @ctxt: an XML parser context
1779 *
1780 * Pops the top parser input from the input stack
1781 *
1782 * Returns the input just removed
1783 */
1784 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1785 inputPop(xmlParserCtxtPtr ctxt)
1786 {
1787 xmlParserInputPtr ret;
1788
1789 if (ctxt == NULL)
1790 return(NULL);
1791 if (ctxt->inputNr <= 0)
1792 return (NULL);
1793 ctxt->inputNr--;
1794 if (ctxt->inputNr > 0)
1795 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1796 else
1797 ctxt->input = NULL;
1798 ret = ctxt->inputTab[ctxt->inputNr];
1799 ctxt->inputTab[ctxt->inputNr] = NULL;
1800 return (ret);
1801 }
1802 /**
1803 * nodePush:
1804 * @ctxt: an XML parser context
1805 * @value: the element node
1806 *
1807 * Pushes a new element node on top of the node stack
1808 *
1809 * Returns -1 in case of error, the index in the stack otherwise
1810 */
1811 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1812 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1813 {
1814 if (ctxt == NULL) return(0);
1815 if (ctxt->nodeNr >= ctxt->nodeMax) {
1816 xmlNodePtr *tmp;
1817
1818 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1819 ctxt->nodeMax * 2 *
1820 sizeof(ctxt->nodeTab[0]));
1821 if (tmp == NULL) {
1822 xmlErrMemory(ctxt, NULL);
1823 return (-1);
1824 }
1825 ctxt->nodeTab = tmp;
1826 ctxt->nodeMax *= 2;
1827 }
1828 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1829 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1830 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1831 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1832 xmlParserMaxDepth);
1833 xmlHaltParser(ctxt);
1834 return(-1);
1835 }
1836 ctxt->nodeTab[ctxt->nodeNr] = value;
1837 ctxt->node = value;
1838 return (ctxt->nodeNr++);
1839 }
1840
1841 /**
1842 * nodePop:
1843 * @ctxt: an XML parser context
1844 *
1845 * Pops the top element node from the node stack
1846 *
1847 * Returns the node just removed
1848 */
1849 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1850 nodePop(xmlParserCtxtPtr ctxt)
1851 {
1852 xmlNodePtr ret;
1853
1854 if (ctxt == NULL) return(NULL);
1855 if (ctxt->nodeNr <= 0)
1856 return (NULL);
1857 ctxt->nodeNr--;
1858 if (ctxt->nodeNr > 0)
1859 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1860 else
1861 ctxt->node = NULL;
1862 ret = ctxt->nodeTab[ctxt->nodeNr];
1863 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1864 return (ret);
1865 }
1866
1867 /**
1868 * nameNsPush:
1869 * @ctxt: an XML parser context
1870 * @value: the element name
1871 * @prefix: the element prefix
1872 * @URI: the element namespace name
1873 * @line: the current line number for error messages
1874 * @nsNr: the number of namespaces pushed on the namespace table
1875 *
1876 * Pushes a new element name/prefix/URL on top of the name stack
1877 *
1878 * Returns -1 in case of error, the index in the stack otherwise
1879 */
1880 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)1881 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1882 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1883 {
1884 xmlStartTag *tag;
1885
1886 if (ctxt->nameNr >= ctxt->nameMax) {
1887 const xmlChar * *tmp;
1888 xmlStartTag *tmp2;
1889 ctxt->nameMax *= 2;
1890 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1891 ctxt->nameMax *
1892 sizeof(ctxt->nameTab[0]));
1893 if (tmp == NULL) {
1894 ctxt->nameMax /= 2;
1895 goto mem_error;
1896 }
1897 ctxt->nameTab = tmp;
1898 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1899 ctxt->nameMax *
1900 sizeof(ctxt->pushTab[0]));
1901 if (tmp2 == NULL) {
1902 ctxt->nameMax /= 2;
1903 goto mem_error;
1904 }
1905 ctxt->pushTab = tmp2;
1906 } else if (ctxt->pushTab == NULL) {
1907 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1908 sizeof(ctxt->pushTab[0]));
1909 if (ctxt->pushTab == NULL)
1910 goto mem_error;
1911 }
1912 ctxt->nameTab[ctxt->nameNr] = value;
1913 ctxt->name = value;
1914 tag = &ctxt->pushTab[ctxt->nameNr];
1915 tag->prefix = prefix;
1916 tag->URI = URI;
1917 tag->line = line;
1918 tag->nsNr = nsNr;
1919 return (ctxt->nameNr++);
1920 mem_error:
1921 xmlErrMemory(ctxt, NULL);
1922 return (-1);
1923 }
1924 #ifdef LIBXML_PUSH_ENABLED
1925 /**
1926 * nameNsPop:
1927 * @ctxt: an XML parser context
1928 *
1929 * Pops the top element/prefix/URI name from the name stack
1930 *
1931 * Returns the name just removed
1932 */
1933 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1934 nameNsPop(xmlParserCtxtPtr ctxt)
1935 {
1936 const xmlChar *ret;
1937
1938 if (ctxt->nameNr <= 0)
1939 return (NULL);
1940 ctxt->nameNr--;
1941 if (ctxt->nameNr > 0)
1942 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1943 else
1944 ctxt->name = NULL;
1945 ret = ctxt->nameTab[ctxt->nameNr];
1946 ctxt->nameTab[ctxt->nameNr] = NULL;
1947 return (ret);
1948 }
1949 #endif /* LIBXML_PUSH_ENABLED */
1950
1951 /**
1952 * namePush:
1953 * @ctxt: an XML parser context
1954 * @value: the element name
1955 *
1956 * Pushes a new element name on top of the name stack
1957 *
1958 * Returns -1 in case of error, the index in the stack otherwise
1959 */
1960 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1961 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1962 {
1963 if (ctxt == NULL) return (-1);
1964
1965 if (ctxt->nameNr >= ctxt->nameMax) {
1966 const xmlChar * *tmp;
1967 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1968 ctxt->nameMax * 2 *
1969 sizeof(ctxt->nameTab[0]));
1970 if (tmp == NULL) {
1971 goto mem_error;
1972 }
1973 ctxt->nameTab = tmp;
1974 ctxt->nameMax *= 2;
1975 }
1976 ctxt->nameTab[ctxt->nameNr] = value;
1977 ctxt->name = value;
1978 return (ctxt->nameNr++);
1979 mem_error:
1980 xmlErrMemory(ctxt, NULL);
1981 return (-1);
1982 }
1983 /**
1984 * namePop:
1985 * @ctxt: an XML parser context
1986 *
1987 * Pops the top element name from the name stack
1988 *
1989 * Returns the name just removed
1990 */
1991 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1992 namePop(xmlParserCtxtPtr ctxt)
1993 {
1994 const xmlChar *ret;
1995
1996 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1997 return (NULL);
1998 ctxt->nameNr--;
1999 if (ctxt->nameNr > 0)
2000 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2001 else
2002 ctxt->name = NULL;
2003 ret = ctxt->nameTab[ctxt->nameNr];
2004 ctxt->nameTab[ctxt->nameNr] = NULL;
2005 return (ret);
2006 }
2007
spacePush(xmlParserCtxtPtr ctxt,int val)2008 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2009 if (ctxt->spaceNr >= ctxt->spaceMax) {
2010 int *tmp;
2011
2012 ctxt->spaceMax *= 2;
2013 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2014 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2015 if (tmp == NULL) {
2016 xmlErrMemory(ctxt, NULL);
2017 ctxt->spaceMax /=2;
2018 return(-1);
2019 }
2020 ctxt->spaceTab = tmp;
2021 }
2022 ctxt->spaceTab[ctxt->spaceNr] = val;
2023 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2024 return(ctxt->spaceNr++);
2025 }
2026
spacePop(xmlParserCtxtPtr ctxt)2027 static int spacePop(xmlParserCtxtPtr ctxt) {
2028 int ret;
2029 if (ctxt->spaceNr <= 0) return(0);
2030 ctxt->spaceNr--;
2031 if (ctxt->spaceNr > 0)
2032 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2033 else
2034 ctxt->space = &ctxt->spaceTab[0];
2035 ret = ctxt->spaceTab[ctxt->spaceNr];
2036 ctxt->spaceTab[ctxt->spaceNr] = -1;
2037 return(ret);
2038 }
2039
2040 /*
2041 * Macros for accessing the content. Those should be used only by the parser,
2042 * and not exported.
2043 *
2044 * Dirty macros, i.e. one often need to make assumption on the context to
2045 * use them
2046 *
2047 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2048 * To be used with extreme caution since operations consuming
2049 * characters may move the input buffer to a different location !
2050 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2051 * This should be used internally by the parser
2052 * only to compare to ASCII values otherwise it would break when
2053 * running with UTF-8 encoding.
2054 * RAW same as CUR but in the input buffer, bypass any token
2055 * extraction that may have been done
2056 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2057 * to compare on ASCII based substring.
2058 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2059 * strings without newlines within the parser.
2060 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2061 * defined char within the parser.
2062 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2063 *
2064 * NEXT Skip to the next character, this does the proper decoding
2065 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2066 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2067 * CUR_CHAR(l) returns the current unicode character (int), set l
2068 * to the number of xmlChars used for the encoding [0-5].
2069 * CUR_SCHAR same but operate on a string instead of the context
2070 * COPY_BUF copy the current unicode char to the target buffer, increment
2071 * the index
2072 * GROW, SHRINK handling of input buffers
2073 */
2074
2075 #define RAW (*ctxt->input->cur)
2076 #define CUR (*ctxt->input->cur)
2077 #define NXT(val) ctxt->input->cur[(val)]
2078 #define CUR_PTR ctxt->input->cur
2079 #define BASE_PTR ctxt->input->base
2080
2081 #define CMP4( s, c1, c2, c3, c4 ) \
2082 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2083 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2084 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2085 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2086 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2087 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2088 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2089 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2090 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2091 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2092 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2093 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2094 ((unsigned char *) s)[ 8 ] == c9 )
2095 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2096 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2097 ((unsigned char *) s)[ 9 ] == c10 )
2098
2099 #define SKIP(val) do { \
2100 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2101 if (*ctxt->input->cur == 0) \
2102 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2103 } while (0)
2104
2105 #define SKIPL(val) do { \
2106 int skipl; \
2107 for(skipl=0; skipl<val; skipl++) { \
2108 if (*(ctxt->input->cur) == '\n') { \
2109 ctxt->input->line++; ctxt->input->col = 1; \
2110 } else ctxt->input->col++; \
2111 ctxt->input->cur++; \
2112 } \
2113 if (*ctxt->input->cur == 0) \
2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2115 } while (0)
2116
2117 #define SHRINK if ((ctxt->progressive == 0) && \
2118 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2119 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2120 xmlSHRINK (ctxt);
2121
xmlSHRINK(xmlParserCtxtPtr ctxt)2122 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2123 xmlParserInputShrink(ctxt->input);
2124 if (*ctxt->input->cur == 0)
2125 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2126 }
2127
2128 #define GROW if ((ctxt->progressive == 0) && \
2129 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2130 xmlGROW (ctxt);
2131
xmlGROW(xmlParserCtxtPtr ctxt)2132 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2133 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2134 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2135
2136 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2137 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2138 ((ctxt->input->buf) &&
2139 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2140 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2141 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2142 xmlHaltParser(ctxt);
2143 return;
2144 }
2145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2146 if ((ctxt->input->cur > ctxt->input->end) ||
2147 (ctxt->input->cur < ctxt->input->base)) {
2148 xmlHaltParser(ctxt);
2149 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2150 return;
2151 }
2152 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2153 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2154 }
2155
2156 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2157
2158 #define NEXT xmlNextChar(ctxt)
2159
2160 #define NEXT1 { \
2161 ctxt->input->col++; \
2162 ctxt->input->cur++; \
2163 if (*ctxt->input->cur == 0) \
2164 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2165 }
2166
2167 #define NEXTL(l) do { \
2168 if (*(ctxt->input->cur) == '\n') { \
2169 ctxt->input->line++; ctxt->input->col = 1; \
2170 } else ctxt->input->col++; \
2171 ctxt->input->cur += l; \
2172 } while (0)
2173
2174 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2175 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2176
2177 #define COPY_BUF(l,b,i,v) \
2178 if (l == 1) b[i++] = (xmlChar) v; \
2179 else i += xmlCopyCharMultiByte(&b[i],v)
2180
2181 /**
2182 * xmlSkipBlankChars:
2183 * @ctxt: the XML parser context
2184 *
2185 * skip all blanks character found at that point in the input streams.
2186 * It pops up finished entities in the process if allowable at that point.
2187 *
2188 * Returns the number of space chars skipped
2189 */
2190
2191 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2192 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2193 int res = 0;
2194
2195 /*
2196 * It's Okay to use CUR/NEXT here since all the blanks are on
2197 * the ASCII range.
2198 */
2199 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2200 (ctxt->instate == XML_PARSER_START)) {
2201 const xmlChar *cur;
2202 /*
2203 * if we are in the document content, go really fast
2204 */
2205 cur = ctxt->input->cur;
2206 while (IS_BLANK_CH(*cur)) {
2207 if (*cur == '\n') {
2208 ctxt->input->line++; ctxt->input->col = 1;
2209 } else {
2210 ctxt->input->col++;
2211 }
2212 cur++;
2213 res++;
2214 if (*cur == 0) {
2215 ctxt->input->cur = cur;
2216 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2217 cur = ctxt->input->cur;
2218 }
2219 }
2220 ctxt->input->cur = cur;
2221 } else {
2222 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2223
2224 while (1) {
2225 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2226 NEXT;
2227 } else if (CUR == '%') {
2228 /*
2229 * Need to handle support of entities branching here
2230 */
2231 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2232 break;
2233 xmlParsePEReference(ctxt);
2234 } else if (CUR == 0) {
2235 if (ctxt->inputNr <= 1)
2236 break;
2237 xmlPopInput(ctxt);
2238 } else {
2239 break;
2240 }
2241
2242 /*
2243 * Also increase the counter when entering or exiting a PERef.
2244 * The spec says: "When a parameter-entity reference is recognized
2245 * in the DTD and included, its replacement text MUST be enlarged
2246 * by the attachment of one leading and one following space (#x20)
2247 * character."
2248 */
2249 res++;
2250 }
2251 }
2252 return(res);
2253 }
2254
2255 /************************************************************************
2256 * *
2257 * Commodity functions to handle entities *
2258 * *
2259 ************************************************************************/
2260
2261 /**
2262 * xmlPopInput:
2263 * @ctxt: an XML parser context
2264 *
2265 * xmlPopInput: the current input pointed by ctxt->input came to an end
2266 * pop it and return the next char.
2267 *
2268 * Returns the current xmlChar in the parser context
2269 */
2270 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2271 xmlPopInput(xmlParserCtxtPtr ctxt) {
2272 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2273 if (xmlParserDebugEntities)
2274 xmlGenericError(xmlGenericErrorContext,
2275 "Popping input %d\n", ctxt->inputNr);
2276 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2277 (ctxt->instate != XML_PARSER_EOF))
2278 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2279 "Unfinished entity outside the DTD");
2280 xmlFreeInputStream(inputPop(ctxt));
2281 if (*ctxt->input->cur == 0)
2282 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2283 return(CUR);
2284 }
2285
2286 /**
2287 * xmlPushInput:
2288 * @ctxt: an XML parser context
2289 * @input: an XML parser input fragment (entity, XML fragment ...).
2290 *
2291 * xmlPushInput: switch to a new input stream which is stacked on top
2292 * of the previous one(s).
2293 * Returns -1 in case of error or the index in the input stack
2294 */
2295 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2296 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2297 int ret;
2298 if (input == NULL) return(-1);
2299
2300 if (xmlParserDebugEntities) {
2301 if ((ctxt->input != NULL) && (ctxt->input->filename))
2302 xmlGenericError(xmlGenericErrorContext,
2303 "%s(%d): ", ctxt->input->filename,
2304 ctxt->input->line);
2305 xmlGenericError(xmlGenericErrorContext,
2306 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2307 }
2308 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2309 (ctxt->inputNr > 1024)) {
2310 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2311 while (ctxt->inputNr > 1)
2312 xmlFreeInputStream(inputPop(ctxt));
2313 return(-1);
2314 }
2315 ret = inputPush(ctxt, input);
2316 if (ctxt->instate == XML_PARSER_EOF)
2317 return(-1);
2318 GROW;
2319 return(ret);
2320 }
2321
2322 /**
2323 * xmlParseCharRef:
2324 * @ctxt: an XML parser context
2325 *
2326 * parse Reference declarations
2327 *
2328 * [66] CharRef ::= '&#' [0-9]+ ';' |
2329 * '&#x' [0-9a-fA-F]+ ';'
2330 *
2331 * [ WFC: Legal Character ]
2332 * Characters referred to using character references must match the
2333 * production for Char.
2334 *
2335 * Returns the value parsed (as an int), 0 in case of error
2336 */
2337 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2338 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2339 int val = 0;
2340 int count = 0;
2341
2342 /*
2343 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2344 */
2345 if ((RAW == '&') && (NXT(1) == '#') &&
2346 (NXT(2) == 'x')) {
2347 SKIP(3);
2348 GROW;
2349 while (RAW != ';') { /* loop blocked by count */
2350 if (count++ > 20) {
2351 count = 0;
2352 GROW;
2353 if (ctxt->instate == XML_PARSER_EOF)
2354 return(0);
2355 }
2356 if ((RAW >= '0') && (RAW <= '9'))
2357 val = val * 16 + (CUR - '0');
2358 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2359 val = val * 16 + (CUR - 'a') + 10;
2360 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2361 val = val * 16 + (CUR - 'A') + 10;
2362 else {
2363 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2364 val = 0;
2365 break;
2366 }
2367 if (val > 0x110000)
2368 val = 0x110000;
2369
2370 NEXT;
2371 count++;
2372 }
2373 if (RAW == ';') {
2374 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2375 ctxt->input->col++;
2376 ctxt->input->cur++;
2377 }
2378 } else if ((RAW == '&') && (NXT(1) == '#')) {
2379 SKIP(2);
2380 GROW;
2381 while (RAW != ';') { /* loop blocked by count */
2382 if (count++ > 20) {
2383 count = 0;
2384 GROW;
2385 if (ctxt->instate == XML_PARSER_EOF)
2386 return(0);
2387 }
2388 if ((RAW >= '0') && (RAW <= '9'))
2389 val = val * 10 + (CUR - '0');
2390 else {
2391 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2392 val = 0;
2393 break;
2394 }
2395 if (val > 0x110000)
2396 val = 0x110000;
2397
2398 NEXT;
2399 count++;
2400 }
2401 if (RAW == ';') {
2402 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2403 ctxt->input->col++;
2404 ctxt->input->cur++;
2405 }
2406 } else {
2407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2408 }
2409
2410 /*
2411 * [ WFC: Legal Character ]
2412 * Characters referred to using character references must match the
2413 * production for Char.
2414 */
2415 if (val >= 0x110000) {
2416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417 "xmlParseCharRef: character reference out of bounds\n",
2418 val);
2419 } else if (IS_CHAR(val)) {
2420 return(val);
2421 } else {
2422 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2423 "xmlParseCharRef: invalid xmlChar value %d\n",
2424 val);
2425 }
2426 return(0);
2427 }
2428
2429 /**
2430 * xmlParseStringCharRef:
2431 * @ctxt: an XML parser context
2432 * @str: a pointer to an index in the string
2433 *
2434 * parse Reference declarations, variant parsing from a string rather
2435 * than an an input flow.
2436 *
2437 * [66] CharRef ::= '&#' [0-9]+ ';' |
2438 * '&#x' [0-9a-fA-F]+ ';'
2439 *
2440 * [ WFC: Legal Character ]
2441 * Characters referred to using character references must match the
2442 * production for Char.
2443 *
2444 * Returns the value parsed (as an int), 0 in case of error, str will be
2445 * updated to the current value of the index
2446 */
2447 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2448 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2449 const xmlChar *ptr;
2450 xmlChar cur;
2451 int val = 0;
2452
2453 if ((str == NULL) || (*str == NULL)) return(0);
2454 ptr = *str;
2455 cur = *ptr;
2456 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2457 ptr += 3;
2458 cur = *ptr;
2459 while (cur != ';') { /* Non input consuming loop */
2460 if ((cur >= '0') && (cur <= '9'))
2461 val = val * 16 + (cur - '0');
2462 else if ((cur >= 'a') && (cur <= 'f'))
2463 val = val * 16 + (cur - 'a') + 10;
2464 else if ((cur >= 'A') && (cur <= 'F'))
2465 val = val * 16 + (cur - 'A') + 10;
2466 else {
2467 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2468 val = 0;
2469 break;
2470 }
2471 if (val > 0x110000)
2472 val = 0x110000;
2473
2474 ptr++;
2475 cur = *ptr;
2476 }
2477 if (cur == ';')
2478 ptr++;
2479 } else if ((cur == '&') && (ptr[1] == '#')){
2480 ptr += 2;
2481 cur = *ptr;
2482 while (cur != ';') { /* Non input consuming loops */
2483 if ((cur >= '0') && (cur <= '9'))
2484 val = val * 10 + (cur - '0');
2485 else {
2486 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2487 val = 0;
2488 break;
2489 }
2490 if (val > 0x110000)
2491 val = 0x110000;
2492
2493 ptr++;
2494 cur = *ptr;
2495 }
2496 if (cur == ';')
2497 ptr++;
2498 } else {
2499 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2500 return(0);
2501 }
2502 *str = ptr;
2503
2504 /*
2505 * [ WFC: Legal Character ]
2506 * Characters referred to using character references must match the
2507 * production for Char.
2508 */
2509 if (val >= 0x110000) {
2510 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511 "xmlParseStringCharRef: character reference out of bounds\n",
2512 val);
2513 } else if (IS_CHAR(val)) {
2514 return(val);
2515 } else {
2516 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2517 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2518 val);
2519 }
2520 return(0);
2521 }
2522
2523 /**
2524 * xmlParserHandlePEReference:
2525 * @ctxt: the parser context
2526 *
2527 * [69] PEReference ::= '%' Name ';'
2528 *
2529 * [ WFC: No Recursion ]
2530 * A parsed entity must not contain a recursive
2531 * reference to itself, either directly or indirectly.
2532 *
2533 * [ WFC: Entity Declared ]
2534 * In a document without any DTD, a document with only an internal DTD
2535 * subset which contains no parameter entity references, or a document
2536 * with "standalone='yes'", ... ... The declaration of a parameter
2537 * entity must precede any reference to it...
2538 *
2539 * [ VC: Entity Declared ]
2540 * In a document with an external subset or external parameter entities
2541 * with "standalone='no'", ... ... The declaration of a parameter entity
2542 * must precede any reference to it...
2543 *
2544 * [ WFC: In DTD ]
2545 * Parameter-entity references may only appear in the DTD.
2546 * NOTE: misleading but this is handled.
2547 *
2548 * A PEReference may have been detected in the current input stream
2549 * the handling is done accordingly to
2550 * http://www.w3.org/TR/REC-xml#entproc
2551 * i.e.
2552 * - Included in literal in entity values
2553 * - Included as Parameter Entity reference within DTDs
2554 */
2555 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2556 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2557 switch(ctxt->instate) {
2558 case XML_PARSER_CDATA_SECTION:
2559 return;
2560 case XML_PARSER_COMMENT:
2561 return;
2562 case XML_PARSER_START_TAG:
2563 return;
2564 case XML_PARSER_END_TAG:
2565 return;
2566 case XML_PARSER_EOF:
2567 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2568 return;
2569 case XML_PARSER_PROLOG:
2570 case XML_PARSER_START:
2571 case XML_PARSER_MISC:
2572 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2573 return;
2574 case XML_PARSER_ENTITY_DECL:
2575 case XML_PARSER_CONTENT:
2576 case XML_PARSER_ATTRIBUTE_VALUE:
2577 case XML_PARSER_PI:
2578 case XML_PARSER_SYSTEM_LITERAL:
2579 case XML_PARSER_PUBLIC_LITERAL:
2580 /* we just ignore it there */
2581 return;
2582 case XML_PARSER_EPILOG:
2583 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2584 return;
2585 case XML_PARSER_ENTITY_VALUE:
2586 /*
2587 * NOTE: in the case of entity values, we don't do the
2588 * substitution here since we need the literal
2589 * entity value to be able to save the internal
2590 * subset of the document.
2591 * This will be handled by xmlStringDecodeEntities
2592 */
2593 return;
2594 case XML_PARSER_DTD:
2595 /*
2596 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2597 * In the internal DTD subset, parameter-entity references
2598 * can occur only where markup declarations can occur, not
2599 * within markup declarations.
2600 * In that case this is handled in xmlParseMarkupDecl
2601 */
2602 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2603 return;
2604 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2605 return;
2606 break;
2607 case XML_PARSER_IGNORE:
2608 return;
2609 }
2610
2611 xmlParsePEReference(ctxt);
2612 }
2613
2614 /*
2615 * Macro used to grow the current buffer.
2616 * buffer##_size is expected to be a size_t
2617 * mem_error: is expected to handle memory allocation failures
2618 */
2619 #define growBuffer(buffer, n) { \
2620 xmlChar *tmp; \
2621 size_t new_size = buffer##_size * 2 + n; \
2622 if (new_size < buffer##_size) goto mem_error; \
2623 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2624 if (tmp == NULL) goto mem_error; \
2625 buffer = tmp; \
2626 buffer##_size = new_size; \
2627 }
2628
2629 /**
2630 * xmlStringLenDecodeEntities:
2631 * @ctxt: the parser context
2632 * @str: the input string
2633 * @len: the string length
2634 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2635 * @end: an end marker xmlChar, 0 if none
2636 * @end2: an end marker xmlChar, 0 if none
2637 * @end3: an end marker xmlChar, 0 if none
2638 *
2639 * Takes a entity string content and process to do the adequate substitutions.
2640 *
2641 * [67] Reference ::= EntityRef | CharRef
2642 *
2643 * [69] PEReference ::= '%' Name ';'
2644 *
2645 * Returns A newly allocated string with the substitution done. The caller
2646 * must deallocate it !
2647 */
2648 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2649 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2650 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2651 xmlChar *buffer = NULL;
2652 size_t buffer_size = 0;
2653 size_t nbchars = 0;
2654
2655 xmlChar *current = NULL;
2656 xmlChar *rep = NULL;
2657 const xmlChar *last;
2658 xmlEntityPtr ent;
2659 int c,l;
2660
2661 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2662 return(NULL);
2663 last = str + len;
2664
2665 if (((ctxt->depth > 40) &&
2666 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2667 (ctxt->depth > 1024)) {
2668 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2669 return(NULL);
2670 }
2671
2672 /*
2673 * allocate a translation buffer.
2674 */
2675 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2676 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2677 if (buffer == NULL) goto mem_error;
2678
2679 /*
2680 * OK loop until we reach one of the ending char or a size limit.
2681 * we are operating on already parsed values.
2682 */
2683 if (str < last)
2684 c = CUR_SCHAR(str, l);
2685 else
2686 c = 0;
2687 while ((c != 0) && (c != end) && /* non input consuming loop */
2688 (c != end2) && (c != end3) &&
2689 (ctxt->instate != XML_PARSER_EOF)) {
2690
2691 if (c == 0) break;
2692 if ((c == '&') && (str[1] == '#')) {
2693 int val = xmlParseStringCharRef(ctxt, &str);
2694 if (val == 0)
2695 goto int_error;
2696 COPY_BUF(0,buffer,nbchars,val);
2697 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2698 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2699 }
2700 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2701 if (xmlParserDebugEntities)
2702 xmlGenericError(xmlGenericErrorContext,
2703 "String decoding Entity Reference: %.30s\n",
2704 str);
2705 ent = xmlParseStringEntityRef(ctxt, &str);
2706 xmlParserEntityCheck(ctxt, 0, ent, 0);
2707 if (ent != NULL)
2708 ctxt->nbentities += ent->checked / 2;
2709 if ((ent != NULL) &&
2710 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2711 if (ent->content != NULL) {
2712 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2713 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2714 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2715 }
2716 } else {
2717 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2718 "predefined entity has no content\n");
2719 goto int_error;
2720 }
2721 } else if ((ent != NULL) && (ent->content != NULL)) {
2722 ctxt->depth++;
2723 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2724 0, 0, 0);
2725 ctxt->depth--;
2726 if (rep == NULL) {
2727 ent->content[0] = 0;
2728 goto int_error;
2729 }
2730
2731 current = rep;
2732 while (*current != 0) { /* non input consuming loop */
2733 buffer[nbchars++] = *current++;
2734 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2735 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2736 goto int_error;
2737 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2738 }
2739 }
2740 xmlFree(rep);
2741 rep = NULL;
2742 } else if (ent != NULL) {
2743 int i = xmlStrlen(ent->name);
2744 const xmlChar *cur = ent->name;
2745
2746 buffer[nbchars++] = '&';
2747 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2748 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2749 }
2750 for (;i > 0;i--)
2751 buffer[nbchars++] = *cur++;
2752 buffer[nbchars++] = ';';
2753 }
2754 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2755 if (xmlParserDebugEntities)
2756 xmlGenericError(xmlGenericErrorContext,
2757 "String decoding PE Reference: %.30s\n", str);
2758 ent = xmlParseStringPEReference(ctxt, &str);
2759 xmlParserEntityCheck(ctxt, 0, ent, 0);
2760 if (ent != NULL)
2761 ctxt->nbentities += ent->checked / 2;
2762 if (ent != NULL) {
2763 if (ent->content == NULL) {
2764 /*
2765 * Note: external parsed entities will not be loaded,
2766 * it is not required for a non-validating parser to
2767 * complete external PEReferences coming from the
2768 * internal subset
2769 */
2770 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2771 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2772 (ctxt->validate != 0)) {
2773 xmlLoadEntityContent(ctxt, ent);
2774 } else {
2775 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2776 "not validating will not read content for PE entity %s\n",
2777 ent->name, NULL);
2778 }
2779 }
2780 ctxt->depth++;
2781 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2782 0, 0, 0);
2783 ctxt->depth--;
2784 if (rep == NULL) {
2785 if (ent->content != NULL)
2786 ent->content[0] = 0;
2787 goto int_error;
2788 }
2789 current = rep;
2790 while (*current != 0) { /* non input consuming loop */
2791 buffer[nbchars++] = *current++;
2792 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2793 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2794 goto int_error;
2795 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2796 }
2797 }
2798 xmlFree(rep);
2799 rep = NULL;
2800 }
2801 } else {
2802 COPY_BUF(l,buffer,nbchars,c);
2803 str += l;
2804 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2805 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2806 }
2807 }
2808 if (str < last)
2809 c = CUR_SCHAR(str, l);
2810 else
2811 c = 0;
2812 }
2813 buffer[nbchars] = 0;
2814 return(buffer);
2815
2816 mem_error:
2817 xmlErrMemory(ctxt, NULL);
2818 int_error:
2819 if (rep != NULL)
2820 xmlFree(rep);
2821 if (buffer != NULL)
2822 xmlFree(buffer);
2823 return(NULL);
2824 }
2825
2826 /**
2827 * xmlStringDecodeEntities:
2828 * @ctxt: the parser context
2829 * @str: the input string
2830 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2831 * @end: an end marker xmlChar, 0 if none
2832 * @end2: an end marker xmlChar, 0 if none
2833 * @end3: an end marker xmlChar, 0 if none
2834 *
2835 * Takes a entity string content and process to do the adequate substitutions.
2836 *
2837 * [67] Reference ::= EntityRef | CharRef
2838 *
2839 * [69] PEReference ::= '%' Name ';'
2840 *
2841 * Returns A newly allocated string with the substitution done. The caller
2842 * must deallocate it !
2843 */
2844 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2845 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2846 xmlChar end, xmlChar end2, xmlChar end3) {
2847 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2848 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2849 end, end2, end3));
2850 }
2851
2852 /************************************************************************
2853 * *
2854 * Commodity functions, cleanup needed ? *
2855 * *
2856 ************************************************************************/
2857
2858 /**
2859 * areBlanks:
2860 * @ctxt: an XML parser context
2861 * @str: a xmlChar *
2862 * @len: the size of @str
2863 * @blank_chars: we know the chars are blanks
2864 *
2865 * Is this a sequence of blank chars that one can ignore ?
2866 *
2867 * Returns 1 if ignorable 0 otherwise.
2868 */
2869
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2870 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871 int blank_chars) {
2872 int i, ret;
2873 xmlNodePtr lastChild;
2874
2875 /*
2876 * Don't spend time trying to differentiate them, the same callback is
2877 * used !
2878 */
2879 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880 return(0);
2881
2882 /*
2883 * Check for xml:space value.
2884 */
2885 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886 (*(ctxt->space) == -2))
2887 return(0);
2888
2889 /*
2890 * Check that the string is made of blanks
2891 */
2892 if (blank_chars == 0) {
2893 for (i = 0;i < len;i++)
2894 if (!(IS_BLANK_CH(str[i]))) return(0);
2895 }
2896
2897 /*
2898 * Look if the element is mixed content in the DTD if available
2899 */
2900 if (ctxt->node == NULL) return(0);
2901 if (ctxt->myDoc != NULL) {
2902 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2903 if (ret == 0) return(1);
2904 if (ret == 1) return(0);
2905 }
2906
2907 /*
2908 * Otherwise, heuristic :-\
2909 */
2910 if ((RAW != '<') && (RAW != 0xD)) return(0);
2911 if ((ctxt->node->children == NULL) &&
2912 (RAW == '<') && (NXT(1) == '/')) return(0);
2913
2914 lastChild = xmlGetLastChild(ctxt->node);
2915 if (lastChild == NULL) {
2916 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2917 (ctxt->node->content != NULL)) return(0);
2918 } else if (xmlNodeIsText(lastChild))
2919 return(0);
2920 else if ((ctxt->node->children != NULL) &&
2921 (xmlNodeIsText(ctxt->node->children)))
2922 return(0);
2923 return(1);
2924 }
2925
2926 /************************************************************************
2927 * *
2928 * Extra stuff for namespace support *
2929 * Relates to http://www.w3.org/TR/WD-xml-names *
2930 * *
2931 ************************************************************************/
2932
2933 /**
2934 * xmlSplitQName:
2935 * @ctxt: an XML parser context
2936 * @name: an XML parser context
2937 * @prefix: a xmlChar **
2938 *
2939 * parse an UTF8 encoded XML qualified name string
2940 *
2941 * [NS 5] QName ::= (Prefix ':')? LocalPart
2942 *
2943 * [NS 6] Prefix ::= NCName
2944 *
2945 * [NS 7] LocalPart ::= NCName
2946 *
2947 * Returns the local part, and prefix is updated
2948 * to get the Prefix if any.
2949 */
2950
2951 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2952 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2953 xmlChar buf[XML_MAX_NAMELEN + 5];
2954 xmlChar *buffer = NULL;
2955 int len = 0;
2956 int max = XML_MAX_NAMELEN;
2957 xmlChar *ret = NULL;
2958 const xmlChar *cur = name;
2959 int c;
2960
2961 if (prefix == NULL) return(NULL);
2962 *prefix = NULL;
2963
2964 if (cur == NULL) return(NULL);
2965
2966 #ifndef XML_XML_NAMESPACE
2967 /* xml: prefix is not really a namespace */
2968 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2969 (cur[2] == 'l') && (cur[3] == ':'))
2970 return(xmlStrdup(name));
2971 #endif
2972
2973 /* nasty but well=formed */
2974 if (cur[0] == ':')
2975 return(xmlStrdup(name));
2976
2977 c = *cur++;
2978 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2979 buf[len++] = c;
2980 c = *cur++;
2981 }
2982 if (len >= max) {
2983 /*
2984 * Okay someone managed to make a huge name, so he's ready to pay
2985 * for the processing speed.
2986 */
2987 max = len * 2;
2988
2989 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2990 if (buffer == NULL) {
2991 xmlErrMemory(ctxt, NULL);
2992 return(NULL);
2993 }
2994 memcpy(buffer, buf, len);
2995 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2996 if (len + 10 > max) {
2997 xmlChar *tmp;
2998
2999 max *= 2;
3000 tmp = (xmlChar *) xmlRealloc(buffer,
3001 max * sizeof(xmlChar));
3002 if (tmp == NULL) {
3003 xmlFree(buffer);
3004 xmlErrMemory(ctxt, NULL);
3005 return(NULL);
3006 }
3007 buffer = tmp;
3008 }
3009 buffer[len++] = c;
3010 c = *cur++;
3011 }
3012 buffer[len] = 0;
3013 }
3014
3015 if ((c == ':') && (*cur == 0)) {
3016 if (buffer != NULL)
3017 xmlFree(buffer);
3018 *prefix = NULL;
3019 return(xmlStrdup(name));
3020 }
3021
3022 if (buffer == NULL)
3023 ret = xmlStrndup(buf, len);
3024 else {
3025 ret = buffer;
3026 buffer = NULL;
3027 max = XML_MAX_NAMELEN;
3028 }
3029
3030
3031 if (c == ':') {
3032 c = *cur;
3033 *prefix = ret;
3034 if (c == 0) {
3035 return(xmlStrndup(BAD_CAST "", 0));
3036 }
3037 len = 0;
3038
3039 /*
3040 * Check that the first character is proper to start
3041 * a new name
3042 */
3043 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3044 ((c >= 0x41) && (c <= 0x5A)) ||
3045 (c == '_') || (c == ':'))) {
3046 int l;
3047 int first = CUR_SCHAR(cur, l);
3048
3049 if (!IS_LETTER(first) && (first != '_')) {
3050 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3051 "Name %s is not XML Namespace compliant\n",
3052 name);
3053 }
3054 }
3055 cur++;
3056
3057 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3058 buf[len++] = c;
3059 c = *cur++;
3060 }
3061 if (len >= max) {
3062 /*
3063 * Okay someone managed to make a huge name, so he's ready to pay
3064 * for the processing speed.
3065 */
3066 max = len * 2;
3067
3068 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3069 if (buffer == NULL) {
3070 xmlErrMemory(ctxt, NULL);
3071 return(NULL);
3072 }
3073 memcpy(buffer, buf, len);
3074 while (c != 0) { /* tested bigname2.xml */
3075 if (len + 10 > max) {
3076 xmlChar *tmp;
3077
3078 max *= 2;
3079 tmp = (xmlChar *) xmlRealloc(buffer,
3080 max * sizeof(xmlChar));
3081 if (tmp == NULL) {
3082 xmlErrMemory(ctxt, NULL);
3083 xmlFree(buffer);
3084 return(NULL);
3085 }
3086 buffer = tmp;
3087 }
3088 buffer[len++] = c;
3089 c = *cur++;
3090 }
3091 buffer[len] = 0;
3092 }
3093
3094 if (buffer == NULL)
3095 ret = xmlStrndup(buf, len);
3096 else {
3097 ret = buffer;
3098 }
3099 }
3100
3101 return(ret);
3102 }
3103
3104 /************************************************************************
3105 * *
3106 * The parser itself *
3107 * Relates to http://www.w3.org/TR/REC-xml *
3108 * *
3109 ************************************************************************/
3110
3111 /************************************************************************
3112 * *
3113 * Routines to parse Name, NCName and NmToken *
3114 * *
3115 ************************************************************************/
3116 #ifdef DEBUG
3117 static unsigned long nbParseName = 0;
3118 static unsigned long nbParseNmToken = 0;
3119 static unsigned long nbParseNCName = 0;
3120 static unsigned long nbParseNCNameComplex = 0;
3121 static unsigned long nbParseNameComplex = 0;
3122 static unsigned long nbParseStringName = 0;
3123 #endif
3124
3125 /*
3126 * The two following functions are related to the change of accepted
3127 * characters for Name and NmToken in the Revision 5 of XML-1.0
3128 * They correspond to the modified production [4] and the new production [4a]
3129 * changes in that revision. Also note that the macros used for the
3130 * productions Letter, Digit, CombiningChar and Extender are not needed
3131 * anymore.
3132 * We still keep compatibility to pre-revision5 parsing semantic if the
3133 * new XML_PARSE_OLD10 option is given to the parser.
3134 */
3135 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3136 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3137 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3138 /*
3139 * Use the new checks of production [4] [4a] amd [5] of the
3140 * Update 5 of XML-1.0
3141 */
3142 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3143 (((c >= 'a') && (c <= 'z')) ||
3144 ((c >= 'A') && (c <= 'Z')) ||
3145 (c == '_') || (c == ':') ||
3146 ((c >= 0xC0) && (c <= 0xD6)) ||
3147 ((c >= 0xD8) && (c <= 0xF6)) ||
3148 ((c >= 0xF8) && (c <= 0x2FF)) ||
3149 ((c >= 0x370) && (c <= 0x37D)) ||
3150 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3151 ((c >= 0x200C) && (c <= 0x200D)) ||
3152 ((c >= 0x2070) && (c <= 0x218F)) ||
3153 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3154 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3155 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3156 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3157 ((c >= 0x10000) && (c <= 0xEFFFF))))
3158 return(1);
3159 } else {
3160 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3161 return(1);
3162 }
3163 return(0);
3164 }
3165
3166 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3167 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3168 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3169 /*
3170 * Use the new checks of production [4] [4a] amd [5] of the
3171 * Update 5 of XML-1.0
3172 */
3173 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3174 (((c >= 'a') && (c <= 'z')) ||
3175 ((c >= 'A') && (c <= 'Z')) ||
3176 ((c >= '0') && (c <= '9')) || /* !start */
3177 (c == '_') || (c == ':') ||
3178 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3179 ((c >= 0xC0) && (c <= 0xD6)) ||
3180 ((c >= 0xD8) && (c <= 0xF6)) ||
3181 ((c >= 0xF8) && (c <= 0x2FF)) ||
3182 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3183 ((c >= 0x370) && (c <= 0x37D)) ||
3184 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185 ((c >= 0x200C) && (c <= 0x200D)) ||
3186 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3187 ((c >= 0x2070) && (c <= 0x218F)) ||
3188 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192 ((c >= 0x10000) && (c <= 0xEFFFF))))
3193 return(1);
3194 } else {
3195 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3196 (c == '.') || (c == '-') ||
3197 (c == '_') || (c == ':') ||
3198 (IS_COMBINING(c)) ||
3199 (IS_EXTENDER(c)))
3200 return(1);
3201 }
3202 return(0);
3203 }
3204
3205 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3206 int *len, int *alloc, int normalize);
3207
3208 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3209 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3210 int len = 0, l;
3211 int c;
3212 int count = 0;
3213 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3214 XML_MAX_TEXT_LENGTH :
3215 XML_MAX_NAME_LENGTH;
3216
3217 #ifdef DEBUG
3218 nbParseNameComplex++;
3219 #endif
3220
3221 /*
3222 * Handler for more complex cases
3223 */
3224 GROW;
3225 if (ctxt->instate == XML_PARSER_EOF)
3226 return(NULL);
3227 c = CUR_CHAR(l);
3228 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3229 /*
3230 * Use the new checks of production [4] [4a] amd [5] of the
3231 * Update 5 of XML-1.0
3232 */
3233 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3234 (!(((c >= 'a') && (c <= 'z')) ||
3235 ((c >= 'A') && (c <= 'Z')) ||
3236 (c == '_') || (c == ':') ||
3237 ((c >= 0xC0) && (c <= 0xD6)) ||
3238 ((c >= 0xD8) && (c <= 0xF6)) ||
3239 ((c >= 0xF8) && (c <= 0x2FF)) ||
3240 ((c >= 0x370) && (c <= 0x37D)) ||
3241 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3242 ((c >= 0x200C) && (c <= 0x200D)) ||
3243 ((c >= 0x2070) && (c <= 0x218F)) ||
3244 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3245 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3246 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3247 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3248 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3249 return(NULL);
3250 }
3251 len += l;
3252 NEXTL(l);
3253 c = CUR_CHAR(l);
3254 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3255 (((c >= 'a') && (c <= 'z')) ||
3256 ((c >= 'A') && (c <= 'Z')) ||
3257 ((c >= '0') && (c <= '9')) || /* !start */
3258 (c == '_') || (c == ':') ||
3259 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3260 ((c >= 0xC0) && (c <= 0xD6)) ||
3261 ((c >= 0xD8) && (c <= 0xF6)) ||
3262 ((c >= 0xF8) && (c <= 0x2FF)) ||
3263 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3264 ((c >= 0x370) && (c <= 0x37D)) ||
3265 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3266 ((c >= 0x200C) && (c <= 0x200D)) ||
3267 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3268 ((c >= 0x2070) && (c <= 0x218F)) ||
3269 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3270 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3271 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3272 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3273 ((c >= 0x10000) && (c <= 0xEFFFF))
3274 )) {
3275 if (count++ > XML_PARSER_CHUNK_SIZE) {
3276 count = 0;
3277 GROW;
3278 if (ctxt->instate == XML_PARSER_EOF)
3279 return(NULL);
3280 }
3281 if (len <= INT_MAX - l)
3282 len += l;
3283 NEXTL(l);
3284 c = CUR_CHAR(l);
3285 }
3286 } else {
3287 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3288 (!IS_LETTER(c) && (c != '_') &&
3289 (c != ':'))) {
3290 return(NULL);
3291 }
3292 len += l;
3293 NEXTL(l);
3294 c = CUR_CHAR(l);
3295
3296 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3297 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298 (c == '.') || (c == '-') ||
3299 (c == '_') || (c == ':') ||
3300 (IS_COMBINING(c)) ||
3301 (IS_EXTENDER(c)))) {
3302 if (count++ > XML_PARSER_CHUNK_SIZE) {
3303 count = 0;
3304 GROW;
3305 if (ctxt->instate == XML_PARSER_EOF)
3306 return(NULL);
3307 }
3308 if (len <= INT_MAX - l)
3309 len += l;
3310 NEXTL(l);
3311 c = CUR_CHAR(l);
3312 }
3313 }
3314 if (len > maxLength) {
3315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316 return(NULL);
3317 }
3318 if (ctxt->input->cur - ctxt->input->base < len) {
3319 /*
3320 * There were a couple of bugs where PERefs lead to to a change
3321 * of the buffer. Check the buffer size to avoid passing an invalid
3322 * pointer to xmlDictLookup.
3323 */
3324 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3325 "unexpected change of input buffer");
3326 return (NULL);
3327 }
3328 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3329 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3330 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3331 }
3332
3333 /**
3334 * xmlParseName:
3335 * @ctxt: an XML parser context
3336 *
3337 * parse an XML name.
3338 *
3339 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3340 * CombiningChar | Extender
3341 *
3342 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3343 *
3344 * [6] Names ::= Name (#x20 Name)*
3345 *
3346 * Returns the Name parsed or NULL
3347 */
3348
3349 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3350 xmlParseName(xmlParserCtxtPtr ctxt) {
3351 const xmlChar *in;
3352 const xmlChar *ret;
3353 size_t count = 0;
3354 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3355 XML_MAX_TEXT_LENGTH :
3356 XML_MAX_NAME_LENGTH;
3357
3358 GROW;
3359
3360 #ifdef DEBUG
3361 nbParseName++;
3362 #endif
3363
3364 /*
3365 * Accelerator for simple ASCII names
3366 */
3367 in = ctxt->input->cur;
3368 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3369 ((*in >= 0x41) && (*in <= 0x5A)) ||
3370 (*in == '_') || (*in == ':')) {
3371 in++;
3372 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 ((*in >= 0x41) && (*in <= 0x5A)) ||
3374 ((*in >= 0x30) && (*in <= 0x39)) ||
3375 (*in == '_') || (*in == '-') ||
3376 (*in == ':') || (*in == '.'))
3377 in++;
3378 if ((*in > 0) && (*in < 0x80)) {
3379 count = in - ctxt->input->cur;
3380 if (count > maxLength) {
3381 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3382 return(NULL);
3383 }
3384 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3385 ctxt->input->cur = in;
3386 ctxt->input->col += count;
3387 if (ret == NULL)
3388 xmlErrMemory(ctxt, NULL);
3389 return(ret);
3390 }
3391 }
3392 /* accelerator for special cases */
3393 return(xmlParseNameComplex(ctxt));
3394 }
3395
3396 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3397 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3398 int len = 0, l;
3399 int c;
3400 int count = 0;
3401 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3402 XML_MAX_TEXT_LENGTH :
3403 XML_MAX_NAME_LENGTH;
3404 size_t startPosition = 0;
3405
3406 #ifdef DEBUG
3407 nbParseNCNameComplex++;
3408 #endif
3409
3410 /*
3411 * Handler for more complex cases
3412 */
3413 GROW;
3414 startPosition = CUR_PTR - BASE_PTR;
3415 c = CUR_CHAR(l);
3416 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3417 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3418 return(NULL);
3419 }
3420
3421 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3422 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3423 if (count++ > XML_PARSER_CHUNK_SIZE) {
3424 count = 0;
3425 GROW;
3426 if (ctxt->instate == XML_PARSER_EOF)
3427 return(NULL);
3428 }
3429 if (len <= INT_MAX - l)
3430 len += l;
3431 NEXTL(l);
3432 c = CUR_CHAR(l);
3433 if (c == 0) {
3434 count = 0;
3435 /*
3436 * when shrinking to extend the buffer we really need to preserve
3437 * the part of the name we already parsed. Hence rolling back
3438 * by current length.
3439 */
3440 ctxt->input->cur -= l;
3441 GROW;
3442 if (ctxt->instate == XML_PARSER_EOF)
3443 return(NULL);
3444 ctxt->input->cur += l;
3445 c = CUR_CHAR(l);
3446 }
3447 }
3448 if (len > maxLength) {
3449 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3450 return(NULL);
3451 }
3452 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3453 }
3454
3455 /**
3456 * xmlParseNCName:
3457 * @ctxt: an XML parser context
3458 * @len: length of the string parsed
3459 *
3460 * parse an XML name.
3461 *
3462 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3463 * CombiningChar | Extender
3464 *
3465 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3466 *
3467 * Returns the Name parsed or NULL
3468 */
3469
3470 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3471 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3472 const xmlChar *in, *e;
3473 const xmlChar *ret;
3474 size_t count = 0;
3475 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3476 XML_MAX_TEXT_LENGTH :
3477 XML_MAX_NAME_LENGTH;
3478
3479 #ifdef DEBUG
3480 nbParseNCName++;
3481 #endif
3482
3483 /*
3484 * Accelerator for simple ASCII names
3485 */
3486 in = ctxt->input->cur;
3487 e = ctxt->input->end;
3488 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3489 ((*in >= 0x41) && (*in <= 0x5A)) ||
3490 (*in == '_')) && (in < e)) {
3491 in++;
3492 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3493 ((*in >= 0x41) && (*in <= 0x5A)) ||
3494 ((*in >= 0x30) && (*in <= 0x39)) ||
3495 (*in == '_') || (*in == '-') ||
3496 (*in == '.')) && (in < e))
3497 in++;
3498 if (in >= e)
3499 goto complex;
3500 if ((*in > 0) && (*in < 0x80)) {
3501 count = in - ctxt->input->cur;
3502 if (count > maxLength) {
3503 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3504 return(NULL);
3505 }
3506 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3507 ctxt->input->cur = in;
3508 ctxt->input->col += count;
3509 if (ret == NULL) {
3510 xmlErrMemory(ctxt, NULL);
3511 }
3512 return(ret);
3513 }
3514 }
3515 complex:
3516 return(xmlParseNCNameComplex(ctxt));
3517 }
3518
3519 /**
3520 * xmlParseNameAndCompare:
3521 * @ctxt: an XML parser context
3522 *
3523 * parse an XML name and compares for match
3524 * (specialized for endtag parsing)
3525 *
3526 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3527 * and the name for mismatch
3528 */
3529
3530 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3531 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3532 register const xmlChar *cmp = other;
3533 register const xmlChar *in;
3534 const xmlChar *ret;
3535
3536 GROW;
3537 if (ctxt->instate == XML_PARSER_EOF)
3538 return(NULL);
3539
3540 in = ctxt->input->cur;
3541 while (*in != 0 && *in == *cmp) {
3542 ++in;
3543 ++cmp;
3544 }
3545 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3546 /* success */
3547 ctxt->input->col += in - ctxt->input->cur;
3548 ctxt->input->cur = in;
3549 return (const xmlChar*) 1;
3550 }
3551 /* failure (or end of input buffer), check with full function */
3552 ret = xmlParseName (ctxt);
3553 /* strings coming from the dictionary direct compare possible */
3554 if (ret == other) {
3555 return (const xmlChar*) 1;
3556 }
3557 return ret;
3558 }
3559
3560 /**
3561 * xmlParseStringName:
3562 * @ctxt: an XML parser context
3563 * @str: a pointer to the string pointer (IN/OUT)
3564 *
3565 * parse an XML name.
3566 *
3567 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3568 * CombiningChar | Extender
3569 *
3570 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3571 *
3572 * [6] Names ::= Name (#x20 Name)*
3573 *
3574 * Returns the Name parsed or NULL. The @str pointer
3575 * is updated to the current location in the string.
3576 */
3577
3578 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3579 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3580 xmlChar buf[XML_MAX_NAMELEN + 5];
3581 const xmlChar *cur = *str;
3582 int len = 0, l;
3583 int c;
3584 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3585 XML_MAX_TEXT_LENGTH :
3586 XML_MAX_NAME_LENGTH;
3587
3588 #ifdef DEBUG
3589 nbParseStringName++;
3590 #endif
3591
3592 c = CUR_SCHAR(cur, l);
3593 if (!xmlIsNameStartChar(ctxt, c)) {
3594 return(NULL);
3595 }
3596
3597 COPY_BUF(l,buf,len,c);
3598 cur += l;
3599 c = CUR_SCHAR(cur, l);
3600 while (xmlIsNameChar(ctxt, c)) {
3601 COPY_BUF(l,buf,len,c);
3602 cur += l;
3603 c = CUR_SCHAR(cur, l);
3604 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3605 /*
3606 * Okay someone managed to make a huge name, so he's ready to pay
3607 * for the processing speed.
3608 */
3609 xmlChar *buffer;
3610 int max = len * 2;
3611
3612 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3613 if (buffer == NULL) {
3614 xmlErrMemory(ctxt, NULL);
3615 return(NULL);
3616 }
3617 memcpy(buffer, buf, len);
3618 while (xmlIsNameChar(ctxt, c)) {
3619 if (len + 10 > max) {
3620 xmlChar *tmp;
3621
3622 max *= 2;
3623 tmp = (xmlChar *) xmlRealloc(buffer,
3624 max * sizeof(xmlChar));
3625 if (tmp == NULL) {
3626 xmlErrMemory(ctxt, NULL);
3627 xmlFree(buffer);
3628 return(NULL);
3629 }
3630 buffer = tmp;
3631 }
3632 COPY_BUF(l,buffer,len,c);
3633 cur += l;
3634 c = CUR_SCHAR(cur, l);
3635 if (len > maxLength) {
3636 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3637 xmlFree(buffer);
3638 return(NULL);
3639 }
3640 }
3641 buffer[len] = 0;
3642 *str = cur;
3643 return(buffer);
3644 }
3645 }
3646 if (len > maxLength) {
3647 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3648 return(NULL);
3649 }
3650 *str = cur;
3651 return(xmlStrndup(buf, len));
3652 }
3653
3654 /**
3655 * xmlParseNmtoken:
3656 * @ctxt: an XML parser context
3657 *
3658 * parse an XML Nmtoken.
3659 *
3660 * [7] Nmtoken ::= (NameChar)+
3661 *
3662 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3663 *
3664 * Returns the Nmtoken parsed or NULL
3665 */
3666
3667 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3668 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3669 xmlChar buf[XML_MAX_NAMELEN + 5];
3670 int len = 0, l;
3671 int c;
3672 int count = 0;
3673 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3674 XML_MAX_TEXT_LENGTH :
3675 XML_MAX_NAME_LENGTH;
3676
3677 #ifdef DEBUG
3678 nbParseNmToken++;
3679 #endif
3680
3681 GROW;
3682 if (ctxt->instate == XML_PARSER_EOF)
3683 return(NULL);
3684 c = CUR_CHAR(l);
3685
3686 while (xmlIsNameChar(ctxt, c)) {
3687 if (count++ > XML_PARSER_CHUNK_SIZE) {
3688 count = 0;
3689 GROW;
3690 }
3691 COPY_BUF(l,buf,len,c);
3692 NEXTL(l);
3693 c = CUR_CHAR(l);
3694 if (c == 0) {
3695 count = 0;
3696 GROW;
3697 if (ctxt->instate == XML_PARSER_EOF)
3698 return(NULL);
3699 c = CUR_CHAR(l);
3700 }
3701 if (len >= XML_MAX_NAMELEN) {
3702 /*
3703 * Okay someone managed to make a huge token, so he's ready to pay
3704 * for the processing speed.
3705 */
3706 xmlChar *buffer;
3707 int max = len * 2;
3708
3709 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3710 if (buffer == NULL) {
3711 xmlErrMemory(ctxt, NULL);
3712 return(NULL);
3713 }
3714 memcpy(buffer, buf, len);
3715 while (xmlIsNameChar(ctxt, c)) {
3716 if (count++ > XML_PARSER_CHUNK_SIZE) {
3717 count = 0;
3718 GROW;
3719 if (ctxt->instate == XML_PARSER_EOF) {
3720 xmlFree(buffer);
3721 return(NULL);
3722 }
3723 }
3724 if (len + 10 > max) {
3725 xmlChar *tmp;
3726
3727 max *= 2;
3728 tmp = (xmlChar *) xmlRealloc(buffer,
3729 max * sizeof(xmlChar));
3730 if (tmp == NULL) {
3731 xmlErrMemory(ctxt, NULL);
3732 xmlFree(buffer);
3733 return(NULL);
3734 }
3735 buffer = tmp;
3736 }
3737 COPY_BUF(l,buffer,len,c);
3738 NEXTL(l);
3739 c = CUR_CHAR(l);
3740 if (len > maxLength) {
3741 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3742 xmlFree(buffer);
3743 return(NULL);
3744 }
3745 }
3746 buffer[len] = 0;
3747 return(buffer);
3748 }
3749 }
3750 if (len == 0)
3751 return(NULL);
3752 if (len > maxLength) {
3753 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3754 return(NULL);
3755 }
3756 return(xmlStrndup(buf, len));
3757 }
3758
3759 /**
3760 * xmlParseEntityValue:
3761 * @ctxt: an XML parser context
3762 * @orig: if non-NULL store a copy of the original entity value
3763 *
3764 * parse a value for ENTITY declarations
3765 *
3766 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3767 * "'" ([^%&'] | PEReference | Reference)* "'"
3768 *
3769 * Returns the EntityValue parsed with reference substituted or NULL
3770 */
3771
3772 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3773 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3774 xmlChar *buf = NULL;
3775 int len = 0;
3776 int size = XML_PARSER_BUFFER_SIZE;
3777 int c, l;
3778 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3779 XML_MAX_HUGE_LENGTH :
3780 XML_MAX_TEXT_LENGTH;
3781 xmlChar stop;
3782 xmlChar *ret = NULL;
3783 const xmlChar *cur = NULL;
3784 xmlParserInputPtr input;
3785
3786 if (RAW == '"') stop = '"';
3787 else if (RAW == '\'') stop = '\'';
3788 else {
3789 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3790 return(NULL);
3791 }
3792 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3793 if (buf == NULL) {
3794 xmlErrMemory(ctxt, NULL);
3795 return(NULL);
3796 }
3797
3798 /*
3799 * The content of the entity definition is copied in a buffer.
3800 */
3801
3802 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3803 input = ctxt->input;
3804 GROW;
3805 if (ctxt->instate == XML_PARSER_EOF)
3806 goto error;
3807 NEXT;
3808 c = CUR_CHAR(l);
3809 /*
3810 * NOTE: 4.4.5 Included in Literal
3811 * When a parameter entity reference appears in a literal entity
3812 * value, ... a single or double quote character in the replacement
3813 * text is always treated as a normal data character and will not
3814 * terminate the literal.
3815 * In practice it means we stop the loop only when back at parsing
3816 * the initial entity and the quote is found
3817 */
3818 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3819 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3820 if (len + 5 >= size) {
3821 xmlChar *tmp;
3822
3823 size *= 2;
3824 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3825 if (tmp == NULL) {
3826 xmlErrMemory(ctxt, NULL);
3827 goto error;
3828 }
3829 buf = tmp;
3830 }
3831 COPY_BUF(l,buf,len,c);
3832 NEXTL(l);
3833
3834 GROW;
3835 c = CUR_CHAR(l);
3836 if (c == 0) {
3837 GROW;
3838 c = CUR_CHAR(l);
3839 }
3840
3841 if (len > maxLength) {
3842 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3843 "entity value too long\n");
3844 goto error;
3845 }
3846 }
3847 buf[len] = 0;
3848 if (ctxt->instate == XML_PARSER_EOF)
3849 goto error;
3850 if (c != stop) {
3851 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3852 goto error;
3853 }
3854 NEXT;
3855
3856 /*
3857 * Raise problem w.r.t. '&' and '%' being used in non-entities
3858 * reference constructs. Note Charref will be handled in
3859 * xmlStringDecodeEntities()
3860 */
3861 cur = buf;
3862 while (*cur != 0) { /* non input consuming */
3863 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3864 xmlChar *name;
3865 xmlChar tmp = *cur;
3866 int nameOk = 0;
3867
3868 cur++;
3869 name = xmlParseStringName(ctxt, &cur);
3870 if (name != NULL) {
3871 nameOk = 1;
3872 xmlFree(name);
3873 }
3874 if ((nameOk == 0) || (*cur != ';')) {
3875 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3876 "EntityValue: '%c' forbidden except for entities references\n",
3877 tmp);
3878 goto error;
3879 }
3880 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3881 (ctxt->inputNr == 1)) {
3882 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3883 goto error;
3884 }
3885 if (*cur == 0)
3886 break;
3887 }
3888 cur++;
3889 }
3890
3891 /*
3892 * Then PEReference entities are substituted.
3893 *
3894 * NOTE: 4.4.7 Bypassed
3895 * When a general entity reference appears in the EntityValue in
3896 * an entity declaration, it is bypassed and left as is.
3897 * so XML_SUBSTITUTE_REF is not set here.
3898 */
3899 ++ctxt->depth;
3900 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3901 0, 0, 0);
3902 --ctxt->depth;
3903 if (orig != NULL) {
3904 *orig = buf;
3905 buf = NULL;
3906 }
3907
3908 error:
3909 if (buf != NULL)
3910 xmlFree(buf);
3911 return(ret);
3912 }
3913
3914 /**
3915 * xmlParseAttValueComplex:
3916 * @ctxt: an XML parser context
3917 * @len: the resulting attribute len
3918 * @normalize: whether to apply the inner normalization
3919 *
3920 * parse a value for an attribute, this is the fallback function
3921 * of xmlParseAttValue() when the attribute parsing requires handling
3922 * of non-ASCII characters, or normalization compaction.
3923 *
3924 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3925 */
3926 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3927 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3928 xmlChar limit = 0;
3929 xmlChar *buf = NULL;
3930 xmlChar *rep = NULL;
3931 size_t len = 0;
3932 size_t buf_size = 0;
3933 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3934 XML_MAX_HUGE_LENGTH :
3935 XML_MAX_TEXT_LENGTH;
3936 int c, l, in_space = 0;
3937 xmlChar *current = NULL;
3938 xmlEntityPtr ent;
3939
3940 if (NXT(0) == '"') {
3941 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3942 limit = '"';
3943 NEXT;
3944 } else if (NXT(0) == '\'') {
3945 limit = '\'';
3946 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3947 NEXT;
3948 } else {
3949 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3950 return(NULL);
3951 }
3952
3953 /*
3954 * allocate a translation buffer.
3955 */
3956 buf_size = XML_PARSER_BUFFER_SIZE;
3957 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3958 if (buf == NULL) goto mem_error;
3959
3960 /*
3961 * OK loop until we reach one of the ending char or a size limit.
3962 */
3963 c = CUR_CHAR(l);
3964 while (((NXT(0) != limit) && /* checked */
3965 (IS_CHAR(c)) && (c != '<')) &&
3966 (ctxt->instate != XML_PARSER_EOF)) {
3967 if (c == '&') {
3968 in_space = 0;
3969 if (NXT(1) == '#') {
3970 int val = xmlParseCharRef(ctxt);
3971
3972 if (val == '&') {
3973 if (ctxt->replaceEntities) {
3974 if (len + 10 > buf_size) {
3975 growBuffer(buf, 10);
3976 }
3977 buf[len++] = '&';
3978 } else {
3979 /*
3980 * The reparsing will be done in xmlStringGetNodeList()
3981 * called by the attribute() function in SAX.c
3982 */
3983 if (len + 10 > buf_size) {
3984 growBuffer(buf, 10);
3985 }
3986 buf[len++] = '&';
3987 buf[len++] = '#';
3988 buf[len++] = '3';
3989 buf[len++] = '8';
3990 buf[len++] = ';';
3991 }
3992 } else if (val != 0) {
3993 if (len + 10 > buf_size) {
3994 growBuffer(buf, 10);
3995 }
3996 len += xmlCopyChar(0, &buf[len], val);
3997 }
3998 } else {
3999 ent = xmlParseEntityRef(ctxt);
4000 ctxt->nbentities++;
4001 if (ent != NULL)
4002 ctxt->nbentities += ent->owner;
4003 if ((ent != NULL) &&
4004 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4005 if (len + 10 > buf_size) {
4006 growBuffer(buf, 10);
4007 }
4008 if ((ctxt->replaceEntities == 0) &&
4009 (ent->content[0] == '&')) {
4010 buf[len++] = '&';
4011 buf[len++] = '#';
4012 buf[len++] = '3';
4013 buf[len++] = '8';
4014 buf[len++] = ';';
4015 } else {
4016 buf[len++] = ent->content[0];
4017 }
4018 } else if ((ent != NULL) &&
4019 (ctxt->replaceEntities != 0)) {
4020 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4021 ++ctxt->depth;
4022 rep = xmlStringDecodeEntities(ctxt, ent->content,
4023 XML_SUBSTITUTE_REF,
4024 0, 0, 0);
4025 --ctxt->depth;
4026 if (rep != NULL) {
4027 current = rep;
4028 while (*current != 0) { /* non input consuming */
4029 if ((*current == 0xD) || (*current == 0xA) ||
4030 (*current == 0x9)) {
4031 buf[len++] = 0x20;
4032 current++;
4033 } else
4034 buf[len++] = *current++;
4035 if (len + 10 > buf_size) {
4036 growBuffer(buf, 10);
4037 }
4038 }
4039 xmlFree(rep);
4040 rep = NULL;
4041 }
4042 } else {
4043 if (len + 10 > buf_size) {
4044 growBuffer(buf, 10);
4045 }
4046 if (ent->content != NULL)
4047 buf[len++] = ent->content[0];
4048 }
4049 } else if (ent != NULL) {
4050 int i = xmlStrlen(ent->name);
4051 const xmlChar *cur = ent->name;
4052
4053 /*
4054 * This may look absurd but is needed to detect
4055 * entities problems
4056 */
4057 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4058 (ent->content != NULL) && (ent->checked == 0)) {
4059 unsigned long oldnbent = ctxt->nbentities, diff;
4060
4061 ++ctxt->depth;
4062 rep = xmlStringDecodeEntities(ctxt, ent->content,
4063 XML_SUBSTITUTE_REF, 0, 0, 0);
4064 --ctxt->depth;
4065
4066 diff = ctxt->nbentities - oldnbent + 1;
4067 if (diff > INT_MAX / 2)
4068 diff = INT_MAX / 2;
4069 ent->checked = diff * 2;
4070 if (rep != NULL) {
4071 if (xmlStrchr(rep, '<'))
4072 ent->checked |= 1;
4073 xmlFree(rep);
4074 rep = NULL;
4075 } else {
4076 ent->content[0] = 0;
4077 }
4078 }
4079
4080 /*
4081 * Just output the reference
4082 */
4083 buf[len++] = '&';
4084 while (len + i + 10 > buf_size) {
4085 growBuffer(buf, i + 10);
4086 }
4087 for (;i > 0;i--)
4088 buf[len++] = *cur++;
4089 buf[len++] = ';';
4090 }
4091 }
4092 } else {
4093 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4094 if ((len != 0) || (!normalize)) {
4095 if ((!normalize) || (!in_space)) {
4096 COPY_BUF(l,buf,len,0x20);
4097 while (len + 10 > buf_size) {
4098 growBuffer(buf, 10);
4099 }
4100 }
4101 in_space = 1;
4102 }
4103 } else {
4104 in_space = 0;
4105 COPY_BUF(l,buf,len,c);
4106 if (len + 10 > buf_size) {
4107 growBuffer(buf, 10);
4108 }
4109 }
4110 NEXTL(l);
4111 }
4112 GROW;
4113 c = CUR_CHAR(l);
4114 if (len > maxLength) {
4115 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4116 "AttValue length too long\n");
4117 goto mem_error;
4118 }
4119 }
4120 if (ctxt->instate == XML_PARSER_EOF)
4121 goto error;
4122
4123 if ((in_space) && (normalize)) {
4124 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4125 }
4126 buf[len] = 0;
4127 if (RAW == '<') {
4128 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4129 } else if (RAW != limit) {
4130 if ((c != 0) && (!IS_CHAR(c))) {
4131 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4132 "invalid character in attribute value\n");
4133 } else {
4134 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4135 "AttValue: ' expected\n");
4136 }
4137 } else
4138 NEXT;
4139
4140 if (attlen != NULL) *attlen = (int) len;
4141 return(buf);
4142
4143 mem_error:
4144 xmlErrMemory(ctxt, NULL);
4145 error:
4146 if (buf != NULL)
4147 xmlFree(buf);
4148 if (rep != NULL)
4149 xmlFree(rep);
4150 return(NULL);
4151 }
4152
4153 /**
4154 * xmlParseAttValue:
4155 * @ctxt: an XML parser context
4156 *
4157 * parse a value for an attribute
4158 * Note: the parser won't do substitution of entities here, this
4159 * will be handled later in xmlStringGetNodeList
4160 *
4161 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4162 * "'" ([^<&'] | Reference)* "'"
4163 *
4164 * 3.3.3 Attribute-Value Normalization:
4165 * Before the value of an attribute is passed to the application or
4166 * checked for validity, the XML processor must normalize it as follows:
4167 * - a character reference is processed by appending the referenced
4168 * character to the attribute value
4169 * - an entity reference is processed by recursively processing the
4170 * replacement text of the entity
4171 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4172 * appending #x20 to the normalized value, except that only a single
4173 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4174 * parsed entity or the literal entity value of an internal parsed entity
4175 * - other characters are processed by appending them to the normalized value
4176 * If the declared value is not CDATA, then the XML processor must further
4177 * process the normalized attribute value by discarding any leading and
4178 * trailing space (#x20) characters, and by replacing sequences of space
4179 * (#x20) characters by a single space (#x20) character.
4180 * All attributes for which no declaration has been read should be treated
4181 * by a non-validating parser as if declared CDATA.
4182 *
4183 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4184 */
4185
4186
4187 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4188 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4189 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4190 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4191 }
4192
4193 /**
4194 * xmlParseSystemLiteral:
4195 * @ctxt: an XML parser context
4196 *
4197 * parse an XML Literal
4198 *
4199 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4200 *
4201 * Returns the SystemLiteral parsed or NULL
4202 */
4203
4204 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4205 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4206 xmlChar *buf = NULL;
4207 int len = 0;
4208 int size = XML_PARSER_BUFFER_SIZE;
4209 int cur, l;
4210 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4211 XML_MAX_TEXT_LENGTH :
4212 XML_MAX_NAME_LENGTH;
4213 xmlChar stop;
4214 int state = ctxt->instate;
4215 int count = 0;
4216
4217 SHRINK;
4218 if (RAW == '"') {
4219 NEXT;
4220 stop = '"';
4221 } else if (RAW == '\'') {
4222 NEXT;
4223 stop = '\'';
4224 } else {
4225 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4226 return(NULL);
4227 }
4228
4229 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4230 if (buf == NULL) {
4231 xmlErrMemory(ctxt, NULL);
4232 return(NULL);
4233 }
4234 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4235 cur = CUR_CHAR(l);
4236 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4237 if (len + 5 >= size) {
4238 xmlChar *tmp;
4239
4240 size *= 2;
4241 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4242 if (tmp == NULL) {
4243 xmlFree(buf);
4244 xmlErrMemory(ctxt, NULL);
4245 ctxt->instate = (xmlParserInputState) state;
4246 return(NULL);
4247 }
4248 buf = tmp;
4249 }
4250 count++;
4251 if (count > 50) {
4252 SHRINK;
4253 GROW;
4254 count = 0;
4255 if (ctxt->instate == XML_PARSER_EOF) {
4256 xmlFree(buf);
4257 return(NULL);
4258 }
4259 }
4260 COPY_BUF(l,buf,len,cur);
4261 NEXTL(l);
4262 cur = CUR_CHAR(l);
4263 if (cur == 0) {
4264 GROW;
4265 SHRINK;
4266 cur = CUR_CHAR(l);
4267 }
4268 if (len > maxLength) {
4269 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4270 xmlFree(buf);
4271 ctxt->instate = (xmlParserInputState) state;
4272 return(NULL);
4273 }
4274 }
4275 buf[len] = 0;
4276 ctxt->instate = (xmlParserInputState) state;
4277 if (!IS_CHAR(cur)) {
4278 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4279 } else {
4280 NEXT;
4281 }
4282 return(buf);
4283 }
4284
4285 /**
4286 * xmlParsePubidLiteral:
4287 * @ctxt: an XML parser context
4288 *
4289 * parse an XML public literal
4290 *
4291 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4292 *
4293 * Returns the PubidLiteral parsed or NULL.
4294 */
4295
4296 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4297 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4298 xmlChar *buf = NULL;
4299 int len = 0;
4300 int size = XML_PARSER_BUFFER_SIZE;
4301 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4302 XML_MAX_TEXT_LENGTH :
4303 XML_MAX_NAME_LENGTH;
4304 xmlChar cur;
4305 xmlChar stop;
4306 int count = 0;
4307 xmlParserInputState oldstate = ctxt->instate;
4308
4309 SHRINK;
4310 if (RAW == '"') {
4311 NEXT;
4312 stop = '"';
4313 } else if (RAW == '\'') {
4314 NEXT;
4315 stop = '\'';
4316 } else {
4317 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4318 return(NULL);
4319 }
4320 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4321 if (buf == NULL) {
4322 xmlErrMemory(ctxt, NULL);
4323 return(NULL);
4324 }
4325 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4326 cur = CUR;
4327 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4328 if (len + 1 >= size) {
4329 xmlChar *tmp;
4330
4331 size *= 2;
4332 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4333 if (tmp == NULL) {
4334 xmlErrMemory(ctxt, NULL);
4335 xmlFree(buf);
4336 return(NULL);
4337 }
4338 buf = tmp;
4339 }
4340 buf[len++] = cur;
4341 count++;
4342 if (count > 50) {
4343 SHRINK;
4344 GROW;
4345 count = 0;
4346 if (ctxt->instate == XML_PARSER_EOF) {
4347 xmlFree(buf);
4348 return(NULL);
4349 }
4350 }
4351 NEXT;
4352 cur = CUR;
4353 if (cur == 0) {
4354 GROW;
4355 SHRINK;
4356 cur = CUR;
4357 }
4358 if (len > maxLength) {
4359 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4360 xmlFree(buf);
4361 return(NULL);
4362 }
4363 }
4364 buf[len] = 0;
4365 if (cur != stop) {
4366 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4367 } else {
4368 NEXT;
4369 }
4370 ctxt->instate = oldstate;
4371 return(buf);
4372 }
4373
4374 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4375
4376 /*
4377 * used for the test in the inner loop of the char data testing
4378 */
4379 static const unsigned char test_char_data[256] = {
4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4385 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4386 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4387 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4388 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4389 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4390 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4391 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4392 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4393 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4394 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4395 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4412 };
4413
4414 /**
4415 * xmlParseCharData:
4416 * @ctxt: an XML parser context
4417 * @cdata: int indicating whether we are within a CDATA section
4418 *
4419 * parse a CharData section.
4420 * if we are within a CDATA section ']]>' marks an end of section.
4421 *
4422 * The right angle bracket (>) may be represented using the string ">",
4423 * and must, for compatibility, be escaped using ">" or a character
4424 * reference when it appears in the string "]]>" in content, when that
4425 * string is not marking the end of a CDATA section.
4426 *
4427 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4428 */
4429
4430 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4431 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4432 const xmlChar *in;
4433 int nbchar = 0;
4434 int line = ctxt->input->line;
4435 int col = ctxt->input->col;
4436 int ccol;
4437
4438 SHRINK;
4439 GROW;
4440 /*
4441 * Accelerated common case where input don't need to be
4442 * modified before passing it to the handler.
4443 */
4444 if (!cdata) {
4445 in = ctxt->input->cur;
4446 do {
4447 get_more_space:
4448 while (*in == 0x20) { in++; ctxt->input->col++; }
4449 if (*in == 0xA) {
4450 do {
4451 ctxt->input->line++; ctxt->input->col = 1;
4452 in++;
4453 } while (*in == 0xA);
4454 goto get_more_space;
4455 }
4456 if (*in == '<') {
4457 nbchar = in - ctxt->input->cur;
4458 if (nbchar > 0) {
4459 const xmlChar *tmp = ctxt->input->cur;
4460 ctxt->input->cur = in;
4461
4462 if ((ctxt->sax != NULL) &&
4463 (ctxt->sax->ignorableWhitespace !=
4464 ctxt->sax->characters)) {
4465 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4466 if (ctxt->sax->ignorableWhitespace != NULL)
4467 ctxt->sax->ignorableWhitespace(ctxt->userData,
4468 tmp, nbchar);
4469 } else {
4470 if (ctxt->sax->characters != NULL)
4471 ctxt->sax->characters(ctxt->userData,
4472 tmp, nbchar);
4473 if (*ctxt->space == -1)
4474 *ctxt->space = -2;
4475 }
4476 } else if ((ctxt->sax != NULL) &&
4477 (ctxt->sax->characters != NULL)) {
4478 ctxt->sax->characters(ctxt->userData,
4479 tmp, nbchar);
4480 }
4481 }
4482 return;
4483 }
4484
4485 get_more:
4486 ccol = ctxt->input->col;
4487 while (test_char_data[*in]) {
4488 in++;
4489 ccol++;
4490 }
4491 ctxt->input->col = ccol;
4492 if (*in == 0xA) {
4493 do {
4494 ctxt->input->line++; ctxt->input->col = 1;
4495 in++;
4496 } while (*in == 0xA);
4497 goto get_more;
4498 }
4499 if (*in == ']') {
4500 if ((in[1] == ']') && (in[2] == '>')) {
4501 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4502 ctxt->input->cur = in + 1;
4503 return;
4504 }
4505 in++;
4506 ctxt->input->col++;
4507 goto get_more;
4508 }
4509 nbchar = in - ctxt->input->cur;
4510 if (nbchar > 0) {
4511 if ((ctxt->sax != NULL) &&
4512 (ctxt->sax->ignorableWhitespace !=
4513 ctxt->sax->characters) &&
4514 (IS_BLANK_CH(*ctxt->input->cur))) {
4515 const xmlChar *tmp = ctxt->input->cur;
4516 ctxt->input->cur = in;
4517
4518 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4519 if (ctxt->sax->ignorableWhitespace != NULL)
4520 ctxt->sax->ignorableWhitespace(ctxt->userData,
4521 tmp, nbchar);
4522 } else {
4523 if (ctxt->sax->characters != NULL)
4524 ctxt->sax->characters(ctxt->userData,
4525 tmp, nbchar);
4526 if (*ctxt->space == -1)
4527 *ctxt->space = -2;
4528 }
4529 line = ctxt->input->line;
4530 col = ctxt->input->col;
4531 } else if (ctxt->sax != NULL) {
4532 if (ctxt->sax->characters != NULL)
4533 ctxt->sax->characters(ctxt->userData,
4534 ctxt->input->cur, nbchar);
4535 line = ctxt->input->line;
4536 col = ctxt->input->col;
4537 }
4538 /* something really bad happened in the SAX callback */
4539 if (ctxt->instate != XML_PARSER_CONTENT)
4540 return;
4541 }
4542 ctxt->input->cur = in;
4543 if (*in == 0xD) {
4544 in++;
4545 if (*in == 0xA) {
4546 ctxt->input->cur = in;
4547 in++;
4548 ctxt->input->line++; ctxt->input->col = 1;
4549 continue; /* while */
4550 }
4551 in--;
4552 }
4553 if (*in == '<') {
4554 return;
4555 }
4556 if (*in == '&') {
4557 return;
4558 }
4559 SHRINK;
4560 GROW;
4561 if (ctxt->instate == XML_PARSER_EOF)
4562 return;
4563 in = ctxt->input->cur;
4564 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4565 nbchar = 0;
4566 }
4567 ctxt->input->line = line;
4568 ctxt->input->col = col;
4569 xmlParseCharDataComplex(ctxt, cdata);
4570 }
4571
4572 /**
4573 * xmlParseCharDataComplex:
4574 * @ctxt: an XML parser context
4575 * @cdata: int indicating whether we are within a CDATA section
4576 *
4577 * parse a CharData section.this is the fallback function
4578 * of xmlParseCharData() when the parsing requires handling
4579 * of non-ASCII characters.
4580 */
4581 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4582 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4583 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4584 int nbchar = 0;
4585 int cur, l;
4586 int count = 0;
4587
4588 SHRINK;
4589 GROW;
4590 cur = CUR_CHAR(l);
4591 while ((cur != '<') && /* checked */
4592 (cur != '&') &&
4593 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4594 if ((cur == ']') && (NXT(1) == ']') &&
4595 (NXT(2) == '>')) {
4596 if (cdata) break;
4597 else {
4598 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4599 }
4600 }
4601 COPY_BUF(l,buf,nbchar,cur);
4602 /* move current position before possible calling of ctxt->sax->characters */
4603 NEXTL(l);
4604 cur = CUR_CHAR(l);
4605 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4606 buf[nbchar] = 0;
4607
4608 /*
4609 * OK the segment is to be consumed as chars.
4610 */
4611 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4612 if (areBlanks(ctxt, buf, nbchar, 0)) {
4613 if (ctxt->sax->ignorableWhitespace != NULL)
4614 ctxt->sax->ignorableWhitespace(ctxt->userData,
4615 buf, nbchar);
4616 } else {
4617 if (ctxt->sax->characters != NULL)
4618 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4619 if ((ctxt->sax->characters !=
4620 ctxt->sax->ignorableWhitespace) &&
4621 (*ctxt->space == -1))
4622 *ctxt->space = -2;
4623 }
4624 }
4625 nbchar = 0;
4626 /* something really bad happened in the SAX callback */
4627 if (ctxt->instate != XML_PARSER_CONTENT)
4628 return;
4629 }
4630 count++;
4631 if (count > 50) {
4632 SHRINK;
4633 GROW;
4634 count = 0;
4635 if (ctxt->instate == XML_PARSER_EOF)
4636 return;
4637 }
4638 }
4639 if (nbchar != 0) {
4640 buf[nbchar] = 0;
4641 /*
4642 * OK the segment is to be consumed as chars.
4643 */
4644 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4645 if (areBlanks(ctxt, buf, nbchar, 0)) {
4646 if (ctxt->sax->ignorableWhitespace != NULL)
4647 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4648 } else {
4649 if (ctxt->sax->characters != NULL)
4650 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4651 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4652 (*ctxt->space == -1))
4653 *ctxt->space = -2;
4654 }
4655 }
4656 }
4657 if ((cur != 0) && (!IS_CHAR(cur))) {
4658 /* Generate the error and skip the offending character */
4659 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4660 "PCDATA invalid Char value %d\n",
4661 cur);
4662 NEXTL(l);
4663 }
4664 }
4665
4666 /**
4667 * xmlParseExternalID:
4668 * @ctxt: an XML parser context
4669 * @publicID: a xmlChar** receiving PubidLiteral
4670 * @strict: indicate whether we should restrict parsing to only
4671 * production [75], see NOTE below
4672 *
4673 * Parse an External ID or a Public ID
4674 *
4675 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4676 * 'PUBLIC' S PubidLiteral S SystemLiteral
4677 *
4678 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4679 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4680 *
4681 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4682 *
4683 * Returns the function returns SystemLiteral and in the second
4684 * case publicID receives PubidLiteral, is strict is off
4685 * it is possible to return NULL and have publicID set.
4686 */
4687
4688 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4689 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4690 xmlChar *URI = NULL;
4691
4692 SHRINK;
4693
4694 *publicID = NULL;
4695 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4696 SKIP(6);
4697 if (SKIP_BLANKS == 0) {
4698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4699 "Space required after 'SYSTEM'\n");
4700 }
4701 URI = xmlParseSystemLiteral(ctxt);
4702 if (URI == NULL) {
4703 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4704 }
4705 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4706 SKIP(6);
4707 if (SKIP_BLANKS == 0) {
4708 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4709 "Space required after 'PUBLIC'\n");
4710 }
4711 *publicID = xmlParsePubidLiteral(ctxt);
4712 if (*publicID == NULL) {
4713 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4714 }
4715 if (strict) {
4716 /*
4717 * We don't handle [83] so "S SystemLiteral" is required.
4718 */
4719 if (SKIP_BLANKS == 0) {
4720 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4721 "Space required after the Public Identifier\n");
4722 }
4723 } else {
4724 /*
4725 * We handle [83] so we return immediately, if
4726 * "S SystemLiteral" is not detected. We skip blanks if no
4727 * system literal was found, but this is harmless since we must
4728 * be at the end of a NotationDecl.
4729 */
4730 if (SKIP_BLANKS == 0) return(NULL);
4731 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4732 }
4733 URI = xmlParseSystemLiteral(ctxt);
4734 if (URI == NULL) {
4735 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4736 }
4737 }
4738 return(URI);
4739 }
4740
4741 /**
4742 * xmlParseCommentComplex:
4743 * @ctxt: an XML parser context
4744 * @buf: the already parsed part of the buffer
4745 * @len: number of bytes in the buffer
4746 * @size: allocated size of the buffer
4747 *
4748 * Skip an XML (SGML) comment <!-- .... -->
4749 * The spec says that "For compatibility, the string "--" (double-hyphen)
4750 * must not occur within comments. "
4751 * This is the slow routine in case the accelerator for ascii didn't work
4752 *
4753 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4754 */
4755 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4756 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4757 size_t len, size_t size) {
4758 int q, ql;
4759 int r, rl;
4760 int cur, l;
4761 size_t count = 0;
4762 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4763 XML_MAX_HUGE_LENGTH :
4764 XML_MAX_TEXT_LENGTH;
4765 int inputid;
4766
4767 inputid = ctxt->input->id;
4768
4769 if (buf == NULL) {
4770 len = 0;
4771 size = XML_PARSER_BUFFER_SIZE;
4772 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4773 if (buf == NULL) {
4774 xmlErrMemory(ctxt, NULL);
4775 return;
4776 }
4777 }
4778 GROW; /* Assure there's enough input data */
4779 q = CUR_CHAR(ql);
4780 if (q == 0)
4781 goto not_terminated;
4782 if (!IS_CHAR(q)) {
4783 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784 "xmlParseComment: invalid xmlChar value %d\n",
4785 q);
4786 xmlFree (buf);
4787 return;
4788 }
4789 NEXTL(ql);
4790 r = CUR_CHAR(rl);
4791 if (r == 0)
4792 goto not_terminated;
4793 if (!IS_CHAR(r)) {
4794 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4795 "xmlParseComment: invalid xmlChar value %d\n",
4796 q);
4797 xmlFree (buf);
4798 return;
4799 }
4800 NEXTL(rl);
4801 cur = CUR_CHAR(l);
4802 if (cur == 0)
4803 goto not_terminated;
4804 while (IS_CHAR(cur) && /* checked */
4805 ((cur != '>') ||
4806 (r != '-') || (q != '-'))) {
4807 if ((r == '-') && (q == '-')) {
4808 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4809 }
4810 if (len + 5 >= size) {
4811 xmlChar *new_buf;
4812 size_t new_size;
4813
4814 new_size = size * 2;
4815 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4816 if (new_buf == NULL) {
4817 xmlFree (buf);
4818 xmlErrMemory(ctxt, NULL);
4819 return;
4820 }
4821 buf = new_buf;
4822 size = new_size;
4823 }
4824 COPY_BUF(ql,buf,len,q);
4825 q = r;
4826 ql = rl;
4827 r = cur;
4828 rl = l;
4829
4830 count++;
4831 if (count > 50) {
4832 SHRINK;
4833 GROW;
4834 count = 0;
4835 if (ctxt->instate == XML_PARSER_EOF) {
4836 xmlFree(buf);
4837 return;
4838 }
4839 }
4840 NEXTL(l);
4841 cur = CUR_CHAR(l);
4842 if (cur == 0) {
4843 SHRINK;
4844 GROW;
4845 cur = CUR_CHAR(l);
4846 }
4847
4848 if (len > maxLength) {
4849 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4850 "Comment too big found", NULL);
4851 xmlFree (buf);
4852 return;
4853 }
4854 }
4855 buf[len] = 0;
4856 if (cur == 0) {
4857 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4858 "Comment not terminated \n<!--%.50s\n", buf);
4859 } else if (!IS_CHAR(cur)) {
4860 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4861 "xmlParseComment: invalid xmlChar value %d\n",
4862 cur);
4863 } else {
4864 if (inputid != ctxt->input->id) {
4865 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4866 "Comment doesn't start and stop in the same"
4867 " entity\n");
4868 }
4869 NEXT;
4870 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4871 (!ctxt->disableSAX))
4872 ctxt->sax->comment(ctxt->userData, buf);
4873 }
4874 xmlFree(buf);
4875 return;
4876 not_terminated:
4877 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4878 "Comment not terminated\n", NULL);
4879 xmlFree(buf);
4880 return;
4881 }
4882
4883 /**
4884 * xmlParseComment:
4885 * @ctxt: an XML parser context
4886 *
4887 * Skip an XML (SGML) comment <!-- .... -->
4888 * The spec says that "For compatibility, the string "--" (double-hyphen)
4889 * must not occur within comments. "
4890 *
4891 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4892 */
4893 void
xmlParseComment(xmlParserCtxtPtr ctxt)4894 xmlParseComment(xmlParserCtxtPtr ctxt) {
4895 xmlChar *buf = NULL;
4896 size_t size = XML_PARSER_BUFFER_SIZE;
4897 size_t len = 0;
4898 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4899 XML_MAX_HUGE_LENGTH :
4900 XML_MAX_TEXT_LENGTH;
4901 xmlParserInputState state;
4902 const xmlChar *in;
4903 size_t nbchar = 0;
4904 int ccol;
4905 int inputid;
4906
4907 /*
4908 * Check that there is a comment right here.
4909 */
4910 if ((RAW != '<') || (NXT(1) != '!') ||
4911 (NXT(2) != '-') || (NXT(3) != '-')) return;
4912 state = ctxt->instate;
4913 ctxt->instate = XML_PARSER_COMMENT;
4914 inputid = ctxt->input->id;
4915 SKIP(4);
4916 SHRINK;
4917 GROW;
4918
4919 /*
4920 * Accelerated common case where input don't need to be
4921 * modified before passing it to the handler.
4922 */
4923 in = ctxt->input->cur;
4924 do {
4925 if (*in == 0xA) {
4926 do {
4927 ctxt->input->line++; ctxt->input->col = 1;
4928 in++;
4929 } while (*in == 0xA);
4930 }
4931 get_more:
4932 ccol = ctxt->input->col;
4933 while (((*in > '-') && (*in <= 0x7F)) ||
4934 ((*in >= 0x20) && (*in < '-')) ||
4935 (*in == 0x09)) {
4936 in++;
4937 ccol++;
4938 }
4939 ctxt->input->col = ccol;
4940 if (*in == 0xA) {
4941 do {
4942 ctxt->input->line++; ctxt->input->col = 1;
4943 in++;
4944 } while (*in == 0xA);
4945 goto get_more;
4946 }
4947 nbchar = in - ctxt->input->cur;
4948 /*
4949 * save current set of data
4950 */
4951 if (nbchar > 0) {
4952 if ((ctxt->sax != NULL) &&
4953 (ctxt->sax->comment != NULL)) {
4954 if (buf == NULL) {
4955 if ((*in == '-') && (in[1] == '-'))
4956 size = nbchar + 1;
4957 else
4958 size = XML_PARSER_BUFFER_SIZE + nbchar;
4959 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4960 if (buf == NULL) {
4961 xmlErrMemory(ctxt, NULL);
4962 ctxt->instate = state;
4963 return;
4964 }
4965 len = 0;
4966 } else if (len + nbchar + 1 >= size) {
4967 xmlChar *new_buf;
4968 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4969 new_buf = (xmlChar *) xmlRealloc(buf,
4970 size * sizeof(xmlChar));
4971 if (new_buf == NULL) {
4972 xmlFree (buf);
4973 xmlErrMemory(ctxt, NULL);
4974 ctxt->instate = state;
4975 return;
4976 }
4977 buf = new_buf;
4978 }
4979 memcpy(&buf[len], ctxt->input->cur, nbchar);
4980 len += nbchar;
4981 buf[len] = 0;
4982 }
4983 }
4984 if (len > maxLength) {
4985 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4986 "Comment too big found", NULL);
4987 xmlFree (buf);
4988 return;
4989 }
4990 ctxt->input->cur = in;
4991 if (*in == 0xA) {
4992 in++;
4993 ctxt->input->line++; ctxt->input->col = 1;
4994 }
4995 if (*in == 0xD) {
4996 in++;
4997 if (*in == 0xA) {
4998 ctxt->input->cur = in;
4999 in++;
5000 ctxt->input->line++; ctxt->input->col = 1;
5001 goto get_more;
5002 }
5003 in--;
5004 }
5005 SHRINK;
5006 GROW;
5007 if (ctxt->instate == XML_PARSER_EOF) {
5008 xmlFree(buf);
5009 return;
5010 }
5011 in = ctxt->input->cur;
5012 if (*in == '-') {
5013 if (in[1] == '-') {
5014 if (in[2] == '>') {
5015 if (ctxt->input->id != inputid) {
5016 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5017 "comment doesn't start and stop in the"
5018 " same entity\n");
5019 }
5020 SKIP(3);
5021 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5022 (!ctxt->disableSAX)) {
5023 if (buf != NULL)
5024 ctxt->sax->comment(ctxt->userData, buf);
5025 else
5026 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5027 }
5028 if (buf != NULL)
5029 xmlFree(buf);
5030 if (ctxt->instate != XML_PARSER_EOF)
5031 ctxt->instate = state;
5032 return;
5033 }
5034 if (buf != NULL) {
5035 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5036 "Double hyphen within comment: "
5037 "<!--%.50s\n",
5038 buf);
5039 } else
5040 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5041 "Double hyphen within comment\n", NULL);
5042 if (ctxt->instate == XML_PARSER_EOF) {
5043 xmlFree(buf);
5044 return;
5045 }
5046 in++;
5047 ctxt->input->col++;
5048 }
5049 in++;
5050 ctxt->input->col++;
5051 goto get_more;
5052 }
5053 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5054 xmlParseCommentComplex(ctxt, buf, len, size);
5055 ctxt->instate = state;
5056 return;
5057 }
5058
5059
5060 /**
5061 * xmlParsePITarget:
5062 * @ctxt: an XML parser context
5063 *
5064 * parse the name of a PI
5065 *
5066 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5067 *
5068 * Returns the PITarget name or NULL
5069 */
5070
5071 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5072 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5073 const xmlChar *name;
5074
5075 name = xmlParseName(ctxt);
5076 if ((name != NULL) &&
5077 ((name[0] == 'x') || (name[0] == 'X')) &&
5078 ((name[1] == 'm') || (name[1] == 'M')) &&
5079 ((name[2] == 'l') || (name[2] == 'L'))) {
5080 int i;
5081 if ((name[0] == 'x') && (name[1] == 'm') &&
5082 (name[2] == 'l') && (name[3] == 0)) {
5083 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5084 "XML declaration allowed only at the start of the document\n");
5085 return(name);
5086 } else if (name[3] == 0) {
5087 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5088 return(name);
5089 }
5090 for (i = 0;;i++) {
5091 if (xmlW3CPIs[i] == NULL) break;
5092 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5093 return(name);
5094 }
5095 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5096 "xmlParsePITarget: invalid name prefix 'xml'\n",
5097 NULL, NULL);
5098 }
5099 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5100 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5101 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5102 }
5103 return(name);
5104 }
5105
5106 #ifdef LIBXML_CATALOG_ENABLED
5107 /**
5108 * xmlParseCatalogPI:
5109 * @ctxt: an XML parser context
5110 * @catalog: the PI value string
5111 *
5112 * parse an XML Catalog Processing Instruction.
5113 *
5114 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5115 *
5116 * Occurs only if allowed by the user and if happening in the Misc
5117 * part of the document before any doctype information
5118 * This will add the given catalog to the parsing context in order
5119 * to be used if there is a resolution need further down in the document
5120 */
5121
5122 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5123 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5124 xmlChar *URL = NULL;
5125 const xmlChar *tmp, *base;
5126 xmlChar marker;
5127
5128 tmp = catalog;
5129 while (IS_BLANK_CH(*tmp)) tmp++;
5130 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5131 goto error;
5132 tmp += 7;
5133 while (IS_BLANK_CH(*tmp)) tmp++;
5134 if (*tmp != '=') {
5135 return;
5136 }
5137 tmp++;
5138 while (IS_BLANK_CH(*tmp)) tmp++;
5139 marker = *tmp;
5140 if ((marker != '\'') && (marker != '"'))
5141 goto error;
5142 tmp++;
5143 base = tmp;
5144 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5145 if (*tmp == 0)
5146 goto error;
5147 URL = xmlStrndup(base, tmp - base);
5148 tmp++;
5149 while (IS_BLANK_CH(*tmp)) tmp++;
5150 if (*tmp != 0)
5151 goto error;
5152
5153 if (URL != NULL) {
5154 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5155 xmlFree(URL);
5156 }
5157 return;
5158
5159 error:
5160 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5161 "Catalog PI syntax error: %s\n",
5162 catalog, NULL);
5163 if (URL != NULL)
5164 xmlFree(URL);
5165 }
5166 #endif
5167
5168 /**
5169 * xmlParsePI:
5170 * @ctxt: an XML parser context
5171 *
5172 * parse an XML Processing Instruction.
5173 *
5174 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5175 *
5176 * The processing is transferred to SAX once parsed.
5177 */
5178
5179 void
xmlParsePI(xmlParserCtxtPtr ctxt)5180 xmlParsePI(xmlParserCtxtPtr ctxt) {
5181 xmlChar *buf = NULL;
5182 size_t len = 0;
5183 size_t size = XML_PARSER_BUFFER_SIZE;
5184 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5185 XML_MAX_HUGE_LENGTH :
5186 XML_MAX_TEXT_LENGTH;
5187 int cur, l;
5188 const xmlChar *target;
5189 xmlParserInputState state;
5190 int count = 0;
5191
5192 if ((RAW == '<') && (NXT(1) == '?')) {
5193 int inputid = ctxt->input->id;
5194 state = ctxt->instate;
5195 ctxt->instate = XML_PARSER_PI;
5196 /*
5197 * this is a Processing Instruction.
5198 */
5199 SKIP(2);
5200 SHRINK;
5201
5202 /*
5203 * Parse the target name and check for special support like
5204 * namespace.
5205 */
5206 target = xmlParsePITarget(ctxt);
5207 if (target != NULL) {
5208 if ((RAW == '?') && (NXT(1) == '>')) {
5209 if (inputid != ctxt->input->id) {
5210 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5211 "PI declaration doesn't start and stop in"
5212 " the same entity\n");
5213 }
5214 SKIP(2);
5215
5216 /*
5217 * SAX: PI detected.
5218 */
5219 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5220 (ctxt->sax->processingInstruction != NULL))
5221 ctxt->sax->processingInstruction(ctxt->userData,
5222 target, NULL);
5223 if (ctxt->instate != XML_PARSER_EOF)
5224 ctxt->instate = state;
5225 return;
5226 }
5227 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5228 if (buf == NULL) {
5229 xmlErrMemory(ctxt, NULL);
5230 ctxt->instate = state;
5231 return;
5232 }
5233 if (SKIP_BLANKS == 0) {
5234 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5235 "ParsePI: PI %s space expected\n", target);
5236 }
5237 cur = CUR_CHAR(l);
5238 while (IS_CHAR(cur) && /* checked */
5239 ((cur != '?') || (NXT(1) != '>'))) {
5240 if (len + 5 >= size) {
5241 xmlChar *tmp;
5242 size_t new_size = size * 2;
5243 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5244 if (tmp == NULL) {
5245 xmlErrMemory(ctxt, NULL);
5246 xmlFree(buf);
5247 ctxt->instate = state;
5248 return;
5249 }
5250 buf = tmp;
5251 size = new_size;
5252 }
5253 count++;
5254 if (count > 50) {
5255 SHRINK;
5256 GROW;
5257 if (ctxt->instate == XML_PARSER_EOF) {
5258 xmlFree(buf);
5259 return;
5260 }
5261 count = 0;
5262 }
5263 COPY_BUF(l,buf,len,cur);
5264 NEXTL(l);
5265 cur = CUR_CHAR(l);
5266 if (cur == 0) {
5267 SHRINK;
5268 GROW;
5269 cur = CUR_CHAR(l);
5270 }
5271 if (len > maxLength) {
5272 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5273 "PI %s too big found", target);
5274 xmlFree(buf);
5275 ctxt->instate = state;
5276 return;
5277 }
5278 }
5279 buf[len] = 0;
5280 if (cur != '?') {
5281 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5282 "ParsePI: PI %s never end ...\n", target);
5283 } else {
5284 if (inputid != ctxt->input->id) {
5285 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5286 "PI declaration doesn't start and stop in"
5287 " the same entity\n");
5288 }
5289 SKIP(2);
5290
5291 #ifdef LIBXML_CATALOG_ENABLED
5292 if (((state == XML_PARSER_MISC) ||
5293 (state == XML_PARSER_START)) &&
5294 (xmlStrEqual(target, XML_CATALOG_PI))) {
5295 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5296 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5297 (allow == XML_CATA_ALLOW_ALL))
5298 xmlParseCatalogPI(ctxt, buf);
5299 }
5300 #endif
5301
5302
5303 /*
5304 * SAX: PI detected.
5305 */
5306 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5307 (ctxt->sax->processingInstruction != NULL))
5308 ctxt->sax->processingInstruction(ctxt->userData,
5309 target, buf);
5310 }
5311 xmlFree(buf);
5312 } else {
5313 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5314 }
5315 if (ctxt->instate != XML_PARSER_EOF)
5316 ctxt->instate = state;
5317 }
5318 }
5319
5320 /**
5321 * xmlParseNotationDecl:
5322 * @ctxt: an XML parser context
5323 *
5324 * parse a notation declaration
5325 *
5326 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5327 *
5328 * Hence there is actually 3 choices:
5329 * 'PUBLIC' S PubidLiteral
5330 * 'PUBLIC' S PubidLiteral S SystemLiteral
5331 * and 'SYSTEM' S SystemLiteral
5332 *
5333 * See the NOTE on xmlParseExternalID().
5334 */
5335
5336 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5337 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5338 const xmlChar *name;
5339 xmlChar *Pubid;
5340 xmlChar *Systemid;
5341
5342 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5343 int inputid = ctxt->input->id;
5344 SHRINK;
5345 SKIP(10);
5346 if (SKIP_BLANKS == 0) {
5347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5348 "Space required after '<!NOTATION'\n");
5349 return;
5350 }
5351
5352 name = xmlParseName(ctxt);
5353 if (name == NULL) {
5354 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5355 return;
5356 }
5357 if (xmlStrchr(name, ':') != NULL) {
5358 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5359 "colons are forbidden from notation names '%s'\n",
5360 name, NULL, NULL);
5361 }
5362 if (SKIP_BLANKS == 0) {
5363 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5364 "Space required after the NOTATION name'\n");
5365 return;
5366 }
5367
5368 /*
5369 * Parse the IDs.
5370 */
5371 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5372 SKIP_BLANKS;
5373
5374 if (RAW == '>') {
5375 if (inputid != ctxt->input->id) {
5376 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5377 "Notation declaration doesn't start and stop"
5378 " in the same entity\n");
5379 }
5380 NEXT;
5381 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5382 (ctxt->sax->notationDecl != NULL))
5383 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5384 } else {
5385 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5386 }
5387 if (Systemid != NULL) xmlFree(Systemid);
5388 if (Pubid != NULL) xmlFree(Pubid);
5389 }
5390 }
5391
5392 /**
5393 * xmlParseEntityDecl:
5394 * @ctxt: an XML parser context
5395 *
5396 * parse <!ENTITY declarations
5397 *
5398 * [70] EntityDecl ::= GEDecl | PEDecl
5399 *
5400 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5401 *
5402 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5403 *
5404 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5405 *
5406 * [74] PEDef ::= EntityValue | ExternalID
5407 *
5408 * [76] NDataDecl ::= S 'NDATA' S Name
5409 *
5410 * [ VC: Notation Declared ]
5411 * The Name must match the declared name of a notation.
5412 */
5413
5414 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5415 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5416 const xmlChar *name = NULL;
5417 xmlChar *value = NULL;
5418 xmlChar *URI = NULL, *literal = NULL;
5419 const xmlChar *ndata = NULL;
5420 int isParameter = 0;
5421 xmlChar *orig = NULL;
5422
5423 /* GROW; done in the caller */
5424 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5425 int inputid = ctxt->input->id;
5426 SHRINK;
5427 SKIP(8);
5428 if (SKIP_BLANKS == 0) {
5429 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5430 "Space required after '<!ENTITY'\n");
5431 }
5432
5433 if (RAW == '%') {
5434 NEXT;
5435 if (SKIP_BLANKS == 0) {
5436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 "Space required after '%%'\n");
5438 }
5439 isParameter = 1;
5440 }
5441
5442 name = xmlParseName(ctxt);
5443 if (name == NULL) {
5444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5445 "xmlParseEntityDecl: no name\n");
5446 return;
5447 }
5448 if (xmlStrchr(name, ':') != NULL) {
5449 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5450 "colons are forbidden from entities names '%s'\n",
5451 name, NULL, NULL);
5452 }
5453 if (SKIP_BLANKS == 0) {
5454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455 "Space required after the entity name\n");
5456 }
5457
5458 ctxt->instate = XML_PARSER_ENTITY_DECL;
5459 /*
5460 * handle the various case of definitions...
5461 */
5462 if (isParameter) {
5463 if ((RAW == '"') || (RAW == '\'')) {
5464 value = xmlParseEntityValue(ctxt, &orig);
5465 if (value) {
5466 if ((ctxt->sax != NULL) &&
5467 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5468 ctxt->sax->entityDecl(ctxt->userData, name,
5469 XML_INTERNAL_PARAMETER_ENTITY,
5470 NULL, NULL, value);
5471 }
5472 } else {
5473 URI = xmlParseExternalID(ctxt, &literal, 1);
5474 if ((URI == NULL) && (literal == NULL)) {
5475 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5476 }
5477 if (URI) {
5478 xmlURIPtr uri;
5479
5480 uri = xmlParseURI((const char *) URI);
5481 if (uri == NULL) {
5482 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5483 "Invalid URI: %s\n", URI);
5484 /*
5485 * This really ought to be a well formedness error
5486 * but the XML Core WG decided otherwise c.f. issue
5487 * E26 of the XML erratas.
5488 */
5489 } else {
5490 if (uri->fragment != NULL) {
5491 /*
5492 * Okay this is foolish to block those but not
5493 * invalid URIs.
5494 */
5495 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5496 } else {
5497 if ((ctxt->sax != NULL) &&
5498 (!ctxt->disableSAX) &&
5499 (ctxt->sax->entityDecl != NULL))
5500 ctxt->sax->entityDecl(ctxt->userData, name,
5501 XML_EXTERNAL_PARAMETER_ENTITY,
5502 literal, URI, NULL);
5503 }
5504 xmlFreeURI(uri);
5505 }
5506 }
5507 }
5508 } else {
5509 if ((RAW == '"') || (RAW == '\'')) {
5510 value = xmlParseEntityValue(ctxt, &orig);
5511 if ((ctxt->sax != NULL) &&
5512 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5513 ctxt->sax->entityDecl(ctxt->userData, name,
5514 XML_INTERNAL_GENERAL_ENTITY,
5515 NULL, NULL, value);
5516 /*
5517 * For expat compatibility in SAX mode.
5518 */
5519 if ((ctxt->myDoc == NULL) ||
5520 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5521 if (ctxt->myDoc == NULL) {
5522 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5523 if (ctxt->myDoc == NULL) {
5524 xmlErrMemory(ctxt, "New Doc failed");
5525 return;
5526 }
5527 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5528 }
5529 if (ctxt->myDoc->intSubset == NULL)
5530 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5531 BAD_CAST "fake", NULL, NULL);
5532
5533 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5534 NULL, NULL, value);
5535 }
5536 } else {
5537 URI = xmlParseExternalID(ctxt, &literal, 1);
5538 if ((URI == NULL) && (literal == NULL)) {
5539 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5540 }
5541 if (URI) {
5542 xmlURIPtr uri;
5543
5544 uri = xmlParseURI((const char *)URI);
5545 if (uri == NULL) {
5546 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5547 "Invalid URI: %s\n", URI);
5548 /*
5549 * This really ought to be a well formedness error
5550 * but the XML Core WG decided otherwise c.f. issue
5551 * E26 of the XML erratas.
5552 */
5553 } else {
5554 if (uri->fragment != NULL) {
5555 /*
5556 * Okay this is foolish to block those but not
5557 * invalid URIs.
5558 */
5559 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5560 }
5561 xmlFreeURI(uri);
5562 }
5563 }
5564 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566 "Space required before 'NDATA'\n");
5567 }
5568 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5569 SKIP(5);
5570 if (SKIP_BLANKS == 0) {
5571 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5572 "Space required after 'NDATA'\n");
5573 }
5574 ndata = xmlParseName(ctxt);
5575 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5576 (ctxt->sax->unparsedEntityDecl != NULL))
5577 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5578 literal, URI, ndata);
5579 } else {
5580 if ((ctxt->sax != NULL) &&
5581 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5582 ctxt->sax->entityDecl(ctxt->userData, name,
5583 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5584 literal, URI, NULL);
5585 /*
5586 * For expat compatibility in SAX mode.
5587 * assuming the entity replacement was asked for
5588 */
5589 if ((ctxt->replaceEntities != 0) &&
5590 ((ctxt->myDoc == NULL) ||
5591 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5592 if (ctxt->myDoc == NULL) {
5593 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5594 if (ctxt->myDoc == NULL) {
5595 xmlErrMemory(ctxt, "New Doc failed");
5596 return;
5597 }
5598 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5599 }
5600
5601 if (ctxt->myDoc->intSubset == NULL)
5602 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5603 BAD_CAST "fake", NULL, NULL);
5604 xmlSAX2EntityDecl(ctxt, name,
5605 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5606 literal, URI, NULL);
5607 }
5608 }
5609 }
5610 }
5611 if (ctxt->instate == XML_PARSER_EOF)
5612 goto done;
5613 SKIP_BLANKS;
5614 if (RAW != '>') {
5615 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5616 "xmlParseEntityDecl: entity %s not terminated\n", name);
5617 xmlHaltParser(ctxt);
5618 } else {
5619 if (inputid != ctxt->input->id) {
5620 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5621 "Entity declaration doesn't start and stop in"
5622 " the same entity\n");
5623 }
5624 NEXT;
5625 }
5626 if (orig != NULL) {
5627 /*
5628 * Ugly mechanism to save the raw entity value.
5629 */
5630 xmlEntityPtr cur = NULL;
5631
5632 if (isParameter) {
5633 if ((ctxt->sax != NULL) &&
5634 (ctxt->sax->getParameterEntity != NULL))
5635 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5636 } else {
5637 if ((ctxt->sax != NULL) &&
5638 (ctxt->sax->getEntity != NULL))
5639 cur = ctxt->sax->getEntity(ctxt->userData, name);
5640 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5641 cur = xmlSAX2GetEntity(ctxt, name);
5642 }
5643 }
5644 if ((cur != NULL) && (cur->orig == NULL)) {
5645 cur->orig = orig;
5646 orig = NULL;
5647 }
5648 }
5649
5650 done:
5651 if (value != NULL) xmlFree(value);
5652 if (URI != NULL) xmlFree(URI);
5653 if (literal != NULL) xmlFree(literal);
5654 if (orig != NULL) xmlFree(orig);
5655 }
5656 }
5657
5658 /**
5659 * xmlParseDefaultDecl:
5660 * @ctxt: an XML parser context
5661 * @value: Receive a possible fixed default value for the attribute
5662 *
5663 * Parse an attribute default declaration
5664 *
5665 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5666 *
5667 * [ VC: Required Attribute ]
5668 * if the default declaration is the keyword #REQUIRED, then the
5669 * attribute must be specified for all elements of the type in the
5670 * attribute-list declaration.
5671 *
5672 * [ VC: Attribute Default Legal ]
5673 * The declared default value must meet the lexical constraints of
5674 * the declared attribute type c.f. xmlValidateAttributeDecl()
5675 *
5676 * [ VC: Fixed Attribute Default ]
5677 * if an attribute has a default value declared with the #FIXED
5678 * keyword, instances of that attribute must match the default value.
5679 *
5680 * [ WFC: No < in Attribute Values ]
5681 * handled in xmlParseAttValue()
5682 *
5683 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5684 * or XML_ATTRIBUTE_FIXED.
5685 */
5686
5687 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5688 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5689 int val;
5690 xmlChar *ret;
5691
5692 *value = NULL;
5693 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5694 SKIP(9);
5695 return(XML_ATTRIBUTE_REQUIRED);
5696 }
5697 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5698 SKIP(8);
5699 return(XML_ATTRIBUTE_IMPLIED);
5700 }
5701 val = XML_ATTRIBUTE_NONE;
5702 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5703 SKIP(6);
5704 val = XML_ATTRIBUTE_FIXED;
5705 if (SKIP_BLANKS == 0) {
5706 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707 "Space required after '#FIXED'\n");
5708 }
5709 }
5710 ret = xmlParseAttValue(ctxt);
5711 ctxt->instate = XML_PARSER_DTD;
5712 if (ret == NULL) {
5713 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5714 "Attribute default value declaration error\n");
5715 } else
5716 *value = ret;
5717 return(val);
5718 }
5719
5720 /**
5721 * xmlParseNotationType:
5722 * @ctxt: an XML parser context
5723 *
5724 * parse an Notation attribute type.
5725 *
5726 * Note: the leading 'NOTATION' S part has already being parsed...
5727 *
5728 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5729 *
5730 * [ VC: Notation Attributes ]
5731 * Values of this type must match one of the notation names included
5732 * in the declaration; all notation names in the declaration must be declared.
5733 *
5734 * Returns: the notation attribute tree built while parsing
5735 */
5736
5737 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5738 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5739 const xmlChar *name;
5740 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5741
5742 if (RAW != '(') {
5743 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5744 return(NULL);
5745 }
5746 SHRINK;
5747 do {
5748 NEXT;
5749 SKIP_BLANKS;
5750 name = xmlParseName(ctxt);
5751 if (name == NULL) {
5752 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5753 "Name expected in NOTATION declaration\n");
5754 xmlFreeEnumeration(ret);
5755 return(NULL);
5756 }
5757 tmp = ret;
5758 while (tmp != NULL) {
5759 if (xmlStrEqual(name, tmp->name)) {
5760 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5761 "standalone: attribute notation value token %s duplicated\n",
5762 name, NULL);
5763 if (!xmlDictOwns(ctxt->dict, name))
5764 xmlFree((xmlChar *) name);
5765 break;
5766 }
5767 tmp = tmp->next;
5768 }
5769 if (tmp == NULL) {
5770 cur = xmlCreateEnumeration(name);
5771 if (cur == NULL) {
5772 xmlFreeEnumeration(ret);
5773 return(NULL);
5774 }
5775 if (last == NULL) ret = last = cur;
5776 else {
5777 last->next = cur;
5778 last = cur;
5779 }
5780 }
5781 SKIP_BLANKS;
5782 } while (RAW == '|');
5783 if (RAW != ')') {
5784 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5785 xmlFreeEnumeration(ret);
5786 return(NULL);
5787 }
5788 NEXT;
5789 return(ret);
5790 }
5791
5792 /**
5793 * xmlParseEnumerationType:
5794 * @ctxt: an XML parser context
5795 *
5796 * parse an Enumeration attribute type.
5797 *
5798 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5799 *
5800 * [ VC: Enumeration ]
5801 * Values of this type must match one of the Nmtoken tokens in
5802 * the declaration
5803 *
5804 * Returns: the enumeration attribute tree built while parsing
5805 */
5806
5807 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5808 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5809 xmlChar *name;
5810 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5811
5812 if (RAW != '(') {
5813 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5814 return(NULL);
5815 }
5816 SHRINK;
5817 do {
5818 NEXT;
5819 SKIP_BLANKS;
5820 name = xmlParseNmtoken(ctxt);
5821 if (name == NULL) {
5822 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5823 return(ret);
5824 }
5825 tmp = ret;
5826 while (tmp != NULL) {
5827 if (xmlStrEqual(name, tmp->name)) {
5828 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5829 "standalone: attribute enumeration value token %s duplicated\n",
5830 name, NULL);
5831 if (!xmlDictOwns(ctxt->dict, name))
5832 xmlFree(name);
5833 break;
5834 }
5835 tmp = tmp->next;
5836 }
5837 if (tmp == NULL) {
5838 cur = xmlCreateEnumeration(name);
5839 if (!xmlDictOwns(ctxt->dict, name))
5840 xmlFree(name);
5841 if (cur == NULL) {
5842 xmlFreeEnumeration(ret);
5843 return(NULL);
5844 }
5845 if (last == NULL) ret = last = cur;
5846 else {
5847 last->next = cur;
5848 last = cur;
5849 }
5850 }
5851 SKIP_BLANKS;
5852 } while (RAW == '|');
5853 if (RAW != ')') {
5854 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5855 return(ret);
5856 }
5857 NEXT;
5858 return(ret);
5859 }
5860
5861 /**
5862 * xmlParseEnumeratedType:
5863 * @ctxt: an XML parser context
5864 * @tree: the enumeration tree built while parsing
5865 *
5866 * parse an Enumerated attribute type.
5867 *
5868 * [57] EnumeratedType ::= NotationType | Enumeration
5869 *
5870 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5871 *
5872 *
5873 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5874 */
5875
5876 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5877 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5878 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5879 SKIP(8);
5880 if (SKIP_BLANKS == 0) {
5881 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5882 "Space required after 'NOTATION'\n");
5883 return(0);
5884 }
5885 *tree = xmlParseNotationType(ctxt);
5886 if (*tree == NULL) return(0);
5887 return(XML_ATTRIBUTE_NOTATION);
5888 }
5889 *tree = xmlParseEnumerationType(ctxt);
5890 if (*tree == NULL) return(0);
5891 return(XML_ATTRIBUTE_ENUMERATION);
5892 }
5893
5894 /**
5895 * xmlParseAttributeType:
5896 * @ctxt: an XML parser context
5897 * @tree: the enumeration tree built while parsing
5898 *
5899 * parse the Attribute list def for an element
5900 *
5901 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5902 *
5903 * [55] StringType ::= 'CDATA'
5904 *
5905 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5906 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5907 *
5908 * Validity constraints for attribute values syntax are checked in
5909 * xmlValidateAttributeValue()
5910 *
5911 * [ VC: ID ]
5912 * Values of type ID must match the Name production. A name must not
5913 * appear more than once in an XML document as a value of this type;
5914 * i.e., ID values must uniquely identify the elements which bear them.
5915 *
5916 * [ VC: One ID per Element Type ]
5917 * No element type may have more than one ID attribute specified.
5918 *
5919 * [ VC: ID Attribute Default ]
5920 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5921 *
5922 * [ VC: IDREF ]
5923 * Values of type IDREF must match the Name production, and values
5924 * of type IDREFS must match Names; each IDREF Name must match the value
5925 * of an ID attribute on some element in the XML document; i.e. IDREF
5926 * values must match the value of some ID attribute.
5927 *
5928 * [ VC: Entity Name ]
5929 * Values of type ENTITY must match the Name production, values
5930 * of type ENTITIES must match Names; each Entity Name must match the
5931 * name of an unparsed entity declared in the DTD.
5932 *
5933 * [ VC: Name Token ]
5934 * Values of type NMTOKEN must match the Nmtoken production; values
5935 * of type NMTOKENS must match Nmtokens.
5936 *
5937 * Returns the attribute type
5938 */
5939 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5940 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5941 SHRINK;
5942 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5943 SKIP(5);
5944 return(XML_ATTRIBUTE_CDATA);
5945 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5946 SKIP(6);
5947 return(XML_ATTRIBUTE_IDREFS);
5948 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5949 SKIP(5);
5950 return(XML_ATTRIBUTE_IDREF);
5951 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5952 SKIP(2);
5953 return(XML_ATTRIBUTE_ID);
5954 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5955 SKIP(6);
5956 return(XML_ATTRIBUTE_ENTITY);
5957 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5958 SKIP(8);
5959 return(XML_ATTRIBUTE_ENTITIES);
5960 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5961 SKIP(8);
5962 return(XML_ATTRIBUTE_NMTOKENS);
5963 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5964 SKIP(7);
5965 return(XML_ATTRIBUTE_NMTOKEN);
5966 }
5967 return(xmlParseEnumeratedType(ctxt, tree));
5968 }
5969
5970 /**
5971 * xmlParseAttributeListDecl:
5972 * @ctxt: an XML parser context
5973 *
5974 * : parse the Attribute list def for an element
5975 *
5976 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5977 *
5978 * [53] AttDef ::= S Name S AttType S DefaultDecl
5979 *
5980 */
5981 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5982 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5983 const xmlChar *elemName;
5984 const xmlChar *attrName;
5985 xmlEnumerationPtr tree;
5986
5987 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5988 int inputid = ctxt->input->id;
5989
5990 SKIP(9);
5991 if (SKIP_BLANKS == 0) {
5992 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5993 "Space required after '<!ATTLIST'\n");
5994 }
5995 elemName = xmlParseName(ctxt);
5996 if (elemName == NULL) {
5997 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5998 "ATTLIST: no name for Element\n");
5999 return;
6000 }
6001 SKIP_BLANKS;
6002 GROW;
6003 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6004 int type;
6005 int def;
6006 xmlChar *defaultValue = NULL;
6007
6008 GROW;
6009 tree = NULL;
6010 attrName = xmlParseName(ctxt);
6011 if (attrName == NULL) {
6012 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6013 "ATTLIST: no name for Attribute\n");
6014 break;
6015 }
6016 GROW;
6017 if (SKIP_BLANKS == 0) {
6018 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6019 "Space required after the attribute name\n");
6020 break;
6021 }
6022
6023 type = xmlParseAttributeType(ctxt, &tree);
6024 if (type <= 0) {
6025 break;
6026 }
6027
6028 GROW;
6029 if (SKIP_BLANKS == 0) {
6030 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6031 "Space required after the attribute type\n");
6032 if (tree != NULL)
6033 xmlFreeEnumeration(tree);
6034 break;
6035 }
6036
6037 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6038 if (def <= 0) {
6039 if (defaultValue != NULL)
6040 xmlFree(defaultValue);
6041 if (tree != NULL)
6042 xmlFreeEnumeration(tree);
6043 break;
6044 }
6045 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6046 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6047
6048 GROW;
6049 if (RAW != '>') {
6050 if (SKIP_BLANKS == 0) {
6051 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6052 "Space required after the attribute default value\n");
6053 if (defaultValue != NULL)
6054 xmlFree(defaultValue);
6055 if (tree != NULL)
6056 xmlFreeEnumeration(tree);
6057 break;
6058 }
6059 }
6060 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6061 (ctxt->sax->attributeDecl != NULL))
6062 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6063 type, def, defaultValue, tree);
6064 else if (tree != NULL)
6065 xmlFreeEnumeration(tree);
6066
6067 if ((ctxt->sax2) && (defaultValue != NULL) &&
6068 (def != XML_ATTRIBUTE_IMPLIED) &&
6069 (def != XML_ATTRIBUTE_REQUIRED)) {
6070 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6071 }
6072 if (ctxt->sax2) {
6073 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6074 }
6075 if (defaultValue != NULL)
6076 xmlFree(defaultValue);
6077 GROW;
6078 }
6079 if (RAW == '>') {
6080 if (inputid != ctxt->input->id) {
6081 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6082 "Attribute list declaration doesn't start and"
6083 " stop in the same entity\n");
6084 }
6085 NEXT;
6086 }
6087 }
6088 }
6089
6090 /**
6091 * xmlParseElementMixedContentDecl:
6092 * @ctxt: an XML parser context
6093 * @inputchk: the input used for the current entity, needed for boundary checks
6094 *
6095 * parse the declaration for a Mixed Element content
6096 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6097 *
6098 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6099 * '(' S? '#PCDATA' S? ')'
6100 *
6101 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6102 *
6103 * [ VC: No Duplicate Types ]
6104 * The same name must not appear more than once in a single
6105 * mixed-content declaration.
6106 *
6107 * returns: the list of the xmlElementContentPtr describing the element choices
6108 */
6109 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6110 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6111 xmlElementContentPtr ret = NULL, cur = NULL, n;
6112 const xmlChar *elem = NULL;
6113
6114 GROW;
6115 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6116 SKIP(7);
6117 SKIP_BLANKS;
6118 SHRINK;
6119 if (RAW == ')') {
6120 if (ctxt->input->id != inputchk) {
6121 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6122 "Element content declaration doesn't start and"
6123 " stop in the same entity\n");
6124 }
6125 NEXT;
6126 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6127 if (ret == NULL)
6128 return(NULL);
6129 if (RAW == '*') {
6130 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6131 NEXT;
6132 }
6133 return(ret);
6134 }
6135 if ((RAW == '(') || (RAW == '|')) {
6136 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6137 if (ret == NULL) return(NULL);
6138 }
6139 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6140 NEXT;
6141 if (elem == NULL) {
6142 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6143 if (ret == NULL) {
6144 xmlFreeDocElementContent(ctxt->myDoc, cur);
6145 return(NULL);
6146 }
6147 ret->c1 = cur;
6148 if (cur != NULL)
6149 cur->parent = ret;
6150 cur = ret;
6151 } else {
6152 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6153 if (n == NULL) {
6154 xmlFreeDocElementContent(ctxt->myDoc, ret);
6155 return(NULL);
6156 }
6157 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6158 if (n->c1 != NULL)
6159 n->c1->parent = n;
6160 cur->c2 = n;
6161 if (n != NULL)
6162 n->parent = cur;
6163 cur = n;
6164 }
6165 SKIP_BLANKS;
6166 elem = xmlParseName(ctxt);
6167 if (elem == NULL) {
6168 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6169 "xmlParseElementMixedContentDecl : Name expected\n");
6170 xmlFreeDocElementContent(ctxt->myDoc, ret);
6171 return(NULL);
6172 }
6173 SKIP_BLANKS;
6174 GROW;
6175 }
6176 if ((RAW == ')') && (NXT(1) == '*')) {
6177 if (elem != NULL) {
6178 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6179 XML_ELEMENT_CONTENT_ELEMENT);
6180 if (cur->c2 != NULL)
6181 cur->c2->parent = cur;
6182 }
6183 if (ret != NULL)
6184 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6185 if (ctxt->input->id != inputchk) {
6186 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6187 "Element content declaration doesn't start and"
6188 " stop in the same entity\n");
6189 }
6190 SKIP(2);
6191 } else {
6192 xmlFreeDocElementContent(ctxt->myDoc, ret);
6193 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6194 return(NULL);
6195 }
6196
6197 } else {
6198 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6199 }
6200 return(ret);
6201 }
6202
6203 /**
6204 * xmlParseElementChildrenContentDeclPriv:
6205 * @ctxt: an XML parser context
6206 * @inputchk: the input used for the current entity, needed for boundary checks
6207 * @depth: the level of recursion
6208 *
6209 * parse the declaration for a Mixed Element content
6210 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6211 *
6212 *
6213 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6214 *
6215 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6216 *
6217 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6218 *
6219 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6220 *
6221 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6222 * TODO Parameter-entity replacement text must be properly nested
6223 * with parenthesized groups. That is to say, if either of the
6224 * opening or closing parentheses in a choice, seq, or Mixed
6225 * construct is contained in the replacement text for a parameter
6226 * entity, both must be contained in the same replacement text. For
6227 * interoperability, if a parameter-entity reference appears in a
6228 * choice, seq, or Mixed construct, its replacement text should not
6229 * be empty, and neither the first nor last non-blank character of
6230 * the replacement text should be a connector (| or ,).
6231 *
6232 * Returns the tree of xmlElementContentPtr describing the element
6233 * hierarchy.
6234 */
6235 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6236 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6237 int depth) {
6238 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6239 const xmlChar *elem;
6240 xmlChar type = 0;
6241
6242 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6243 (depth > 2048)) {
6244 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6245 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6246 depth);
6247 return(NULL);
6248 }
6249 SKIP_BLANKS;
6250 GROW;
6251 if (RAW == '(') {
6252 int inputid = ctxt->input->id;
6253
6254 /* Recurse on first child */
6255 NEXT;
6256 SKIP_BLANKS;
6257 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6258 depth + 1);
6259 if (cur == NULL)
6260 return(NULL);
6261 SKIP_BLANKS;
6262 GROW;
6263 } else {
6264 elem = xmlParseName(ctxt);
6265 if (elem == NULL) {
6266 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6267 return(NULL);
6268 }
6269 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6270 if (cur == NULL) {
6271 xmlErrMemory(ctxt, NULL);
6272 return(NULL);
6273 }
6274 GROW;
6275 if (RAW == '?') {
6276 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6277 NEXT;
6278 } else if (RAW == '*') {
6279 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6280 NEXT;
6281 } else if (RAW == '+') {
6282 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6283 NEXT;
6284 } else {
6285 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6286 }
6287 GROW;
6288 }
6289 SKIP_BLANKS;
6290 SHRINK;
6291 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6292 /*
6293 * Each loop we parse one separator and one element.
6294 */
6295 if (RAW == ',') {
6296 if (type == 0) type = CUR;
6297
6298 /*
6299 * Detect "Name | Name , Name" error
6300 */
6301 else if (type != CUR) {
6302 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6303 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6304 type);
6305 if ((last != NULL) && (last != ret))
6306 xmlFreeDocElementContent(ctxt->myDoc, last);
6307 if (ret != NULL)
6308 xmlFreeDocElementContent(ctxt->myDoc, ret);
6309 return(NULL);
6310 }
6311 NEXT;
6312
6313 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6314 if (op == NULL) {
6315 if ((last != NULL) && (last != ret))
6316 xmlFreeDocElementContent(ctxt->myDoc, last);
6317 xmlFreeDocElementContent(ctxt->myDoc, ret);
6318 return(NULL);
6319 }
6320 if (last == NULL) {
6321 op->c1 = ret;
6322 if (ret != NULL)
6323 ret->parent = op;
6324 ret = cur = op;
6325 } else {
6326 cur->c2 = op;
6327 if (op != NULL)
6328 op->parent = cur;
6329 op->c1 = last;
6330 if (last != NULL)
6331 last->parent = op;
6332 cur =op;
6333 last = NULL;
6334 }
6335 } else if (RAW == '|') {
6336 if (type == 0) type = CUR;
6337
6338 /*
6339 * Detect "Name , Name | Name" error
6340 */
6341 else if (type != CUR) {
6342 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6343 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6344 type);
6345 if ((last != NULL) && (last != ret))
6346 xmlFreeDocElementContent(ctxt->myDoc, last);
6347 if (ret != NULL)
6348 xmlFreeDocElementContent(ctxt->myDoc, ret);
6349 return(NULL);
6350 }
6351 NEXT;
6352
6353 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6354 if (op == NULL) {
6355 if ((last != NULL) && (last != ret))
6356 xmlFreeDocElementContent(ctxt->myDoc, last);
6357 if (ret != NULL)
6358 xmlFreeDocElementContent(ctxt->myDoc, ret);
6359 return(NULL);
6360 }
6361 if (last == NULL) {
6362 op->c1 = ret;
6363 if (ret != NULL)
6364 ret->parent = op;
6365 ret = cur = op;
6366 } else {
6367 cur->c2 = op;
6368 if (op != NULL)
6369 op->parent = cur;
6370 op->c1 = last;
6371 if (last != NULL)
6372 last->parent = op;
6373 cur =op;
6374 last = NULL;
6375 }
6376 } else {
6377 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6378 if ((last != NULL) && (last != ret))
6379 xmlFreeDocElementContent(ctxt->myDoc, last);
6380 if (ret != NULL)
6381 xmlFreeDocElementContent(ctxt->myDoc, ret);
6382 return(NULL);
6383 }
6384 GROW;
6385 SKIP_BLANKS;
6386 GROW;
6387 if (RAW == '(') {
6388 int inputid = ctxt->input->id;
6389 /* Recurse on second child */
6390 NEXT;
6391 SKIP_BLANKS;
6392 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6393 depth + 1);
6394 if (last == NULL) {
6395 if (ret != NULL)
6396 xmlFreeDocElementContent(ctxt->myDoc, ret);
6397 return(NULL);
6398 }
6399 SKIP_BLANKS;
6400 } else {
6401 elem = xmlParseName(ctxt);
6402 if (elem == NULL) {
6403 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6404 if (ret != NULL)
6405 xmlFreeDocElementContent(ctxt->myDoc, ret);
6406 return(NULL);
6407 }
6408 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6409 if (last == NULL) {
6410 if (ret != NULL)
6411 xmlFreeDocElementContent(ctxt->myDoc, ret);
6412 return(NULL);
6413 }
6414 if (RAW == '?') {
6415 last->ocur = XML_ELEMENT_CONTENT_OPT;
6416 NEXT;
6417 } else if (RAW == '*') {
6418 last->ocur = XML_ELEMENT_CONTENT_MULT;
6419 NEXT;
6420 } else if (RAW == '+') {
6421 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6422 NEXT;
6423 } else {
6424 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6425 }
6426 }
6427 SKIP_BLANKS;
6428 GROW;
6429 }
6430 if ((cur != NULL) && (last != NULL)) {
6431 cur->c2 = last;
6432 if (last != NULL)
6433 last->parent = cur;
6434 }
6435 if (ctxt->input->id != inputchk) {
6436 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6437 "Element content declaration doesn't start and stop in"
6438 " the same entity\n");
6439 }
6440 NEXT;
6441 if (RAW == '?') {
6442 if (ret != NULL) {
6443 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6444 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6445 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6446 else
6447 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6448 }
6449 NEXT;
6450 } else if (RAW == '*') {
6451 if (ret != NULL) {
6452 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6453 cur = ret;
6454 /*
6455 * Some normalization:
6456 * (a | b* | c?)* == (a | b | c)*
6457 */
6458 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6459 if ((cur->c1 != NULL) &&
6460 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6461 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6462 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6463 if ((cur->c2 != NULL) &&
6464 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6465 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6466 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6467 cur = cur->c2;
6468 }
6469 }
6470 NEXT;
6471 } else if (RAW == '+') {
6472 if (ret != NULL) {
6473 int found = 0;
6474
6475 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6476 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6477 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6478 else
6479 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6480 /*
6481 * Some normalization:
6482 * (a | b*)+ == (a | b)*
6483 * (a | b?)+ == (a | b)*
6484 */
6485 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6486 if ((cur->c1 != NULL) &&
6487 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6490 found = 1;
6491 }
6492 if ((cur->c2 != NULL) &&
6493 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6494 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6495 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6496 found = 1;
6497 }
6498 cur = cur->c2;
6499 }
6500 if (found)
6501 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6502 }
6503 NEXT;
6504 }
6505 return(ret);
6506 }
6507
6508 /**
6509 * xmlParseElementChildrenContentDecl:
6510 * @ctxt: an XML parser context
6511 * @inputchk: the input used for the current entity, needed for boundary checks
6512 *
6513 * parse the declaration for a Mixed Element content
6514 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6515 *
6516 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6517 *
6518 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6519 *
6520 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6521 *
6522 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6523 *
6524 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6525 * TODO Parameter-entity replacement text must be properly nested
6526 * with parenthesized groups. That is to say, if either of the
6527 * opening or closing parentheses in a choice, seq, or Mixed
6528 * construct is contained in the replacement text for a parameter
6529 * entity, both must be contained in the same replacement text. For
6530 * interoperability, if a parameter-entity reference appears in a
6531 * choice, seq, or Mixed construct, its replacement text should not
6532 * be empty, and neither the first nor last non-blank character of
6533 * the replacement text should be a connector (| or ,).
6534 *
6535 * Returns the tree of xmlElementContentPtr describing the element
6536 * hierarchy.
6537 */
6538 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6539 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6540 /* stub left for API/ABI compat */
6541 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6542 }
6543
6544 /**
6545 * xmlParseElementContentDecl:
6546 * @ctxt: an XML parser context
6547 * @name: the name of the element being defined.
6548 * @result: the Element Content pointer will be stored here if any
6549 *
6550 * parse the declaration for an Element content either Mixed or Children,
6551 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6552 *
6553 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6554 *
6555 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6556 */
6557
6558 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6559 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6560 xmlElementContentPtr *result) {
6561
6562 xmlElementContentPtr tree = NULL;
6563 int inputid = ctxt->input->id;
6564 int res;
6565
6566 *result = NULL;
6567
6568 if (RAW != '(') {
6569 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6570 "xmlParseElementContentDecl : %s '(' expected\n", name);
6571 return(-1);
6572 }
6573 NEXT;
6574 GROW;
6575 if (ctxt->instate == XML_PARSER_EOF)
6576 return(-1);
6577 SKIP_BLANKS;
6578 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6579 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6580 res = XML_ELEMENT_TYPE_MIXED;
6581 } else {
6582 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6583 res = XML_ELEMENT_TYPE_ELEMENT;
6584 }
6585 SKIP_BLANKS;
6586 *result = tree;
6587 return(res);
6588 }
6589
6590 /**
6591 * xmlParseElementDecl:
6592 * @ctxt: an XML parser context
6593 *
6594 * parse an Element declaration.
6595 *
6596 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6597 *
6598 * [ VC: Unique Element Type Declaration ]
6599 * No element type may be declared more than once
6600 *
6601 * Returns the type of the element, or -1 in case of error
6602 */
6603 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6604 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6605 const xmlChar *name;
6606 int ret = -1;
6607 xmlElementContentPtr content = NULL;
6608
6609 /* GROW; done in the caller */
6610 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6611 int inputid = ctxt->input->id;
6612
6613 SKIP(9);
6614 if (SKIP_BLANKS == 0) {
6615 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6616 "Space required after 'ELEMENT'\n");
6617 return(-1);
6618 }
6619 name = xmlParseName(ctxt);
6620 if (name == NULL) {
6621 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6622 "xmlParseElementDecl: no name for Element\n");
6623 return(-1);
6624 }
6625 if (SKIP_BLANKS == 0) {
6626 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6627 "Space required after the element name\n");
6628 }
6629 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6630 SKIP(5);
6631 /*
6632 * Element must always be empty.
6633 */
6634 ret = XML_ELEMENT_TYPE_EMPTY;
6635 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6636 (NXT(2) == 'Y')) {
6637 SKIP(3);
6638 /*
6639 * Element is a generic container.
6640 */
6641 ret = XML_ELEMENT_TYPE_ANY;
6642 } else if (RAW == '(') {
6643 ret = xmlParseElementContentDecl(ctxt, name, &content);
6644 } else {
6645 /*
6646 * [ WFC: PEs in Internal Subset ] error handling.
6647 */
6648 if ((RAW == '%') && (ctxt->external == 0) &&
6649 (ctxt->inputNr == 1)) {
6650 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6651 "PEReference: forbidden within markup decl in internal subset\n");
6652 } else {
6653 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6654 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6655 }
6656 return(-1);
6657 }
6658
6659 SKIP_BLANKS;
6660
6661 if (RAW != '>') {
6662 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6663 if (content != NULL) {
6664 xmlFreeDocElementContent(ctxt->myDoc, content);
6665 }
6666 } else {
6667 if (inputid != ctxt->input->id) {
6668 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6669 "Element declaration doesn't start and stop in"
6670 " the same entity\n");
6671 }
6672
6673 NEXT;
6674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6675 (ctxt->sax->elementDecl != NULL)) {
6676 if (content != NULL)
6677 content->parent = NULL;
6678 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6679 content);
6680 if ((content != NULL) && (content->parent == NULL)) {
6681 /*
6682 * this is a trick: if xmlAddElementDecl is called,
6683 * instead of copying the full tree it is plugged directly
6684 * if called from the parser. Avoid duplicating the
6685 * interfaces or change the API/ABI
6686 */
6687 xmlFreeDocElementContent(ctxt->myDoc, content);
6688 }
6689 } else if (content != NULL) {
6690 xmlFreeDocElementContent(ctxt->myDoc, content);
6691 }
6692 }
6693 }
6694 return(ret);
6695 }
6696
6697 /**
6698 * xmlParseConditionalSections
6699 * @ctxt: an XML parser context
6700 *
6701 * [61] conditionalSect ::= includeSect | ignoreSect
6702 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6703 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6704 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6705 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6706 */
6707
6708 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6709 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6710 int *inputIds = NULL;
6711 size_t inputIdsSize = 0;
6712 size_t depth = 0;
6713
6714 while (ctxt->instate != XML_PARSER_EOF) {
6715 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6716 int id = ctxt->input->id;
6717
6718 SKIP(3);
6719 SKIP_BLANKS;
6720
6721 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6722 SKIP(7);
6723 SKIP_BLANKS;
6724 if (RAW != '[') {
6725 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6726 xmlHaltParser(ctxt);
6727 goto error;
6728 }
6729 if (ctxt->input->id != id) {
6730 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6731 "All markup of the conditional section is"
6732 " not in the same entity\n");
6733 }
6734 NEXT;
6735
6736 if (inputIdsSize <= depth) {
6737 int *tmp;
6738
6739 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6740 tmp = (int *) xmlRealloc(inputIds,
6741 inputIdsSize * sizeof(int));
6742 if (tmp == NULL) {
6743 xmlErrMemory(ctxt, NULL);
6744 goto error;
6745 }
6746 inputIds = tmp;
6747 }
6748 inputIds[depth] = id;
6749 depth++;
6750 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6751 int state;
6752 xmlParserInputState instate;
6753 size_t ignoreDepth = 0;
6754
6755 SKIP(6);
6756 SKIP_BLANKS;
6757 if (RAW != '[') {
6758 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6759 xmlHaltParser(ctxt);
6760 goto error;
6761 }
6762 if (ctxt->input->id != id) {
6763 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6764 "All markup of the conditional section is"
6765 " not in the same entity\n");
6766 }
6767 NEXT;
6768
6769 /*
6770 * Parse up to the end of the conditional section but disable
6771 * SAX event generating DTD building in the meantime
6772 */
6773 state = ctxt->disableSAX;
6774 instate = ctxt->instate;
6775 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6776 ctxt->instate = XML_PARSER_IGNORE;
6777
6778 while (RAW != 0) {
6779 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6780 SKIP(3);
6781 ignoreDepth++;
6782 /* Check for integer overflow */
6783 if (ignoreDepth == 0) {
6784 xmlErrMemory(ctxt, NULL);
6785 goto error;
6786 }
6787 } else if ((RAW == ']') && (NXT(1) == ']') &&
6788 (NXT(2) == '>')) {
6789 if (ignoreDepth == 0)
6790 break;
6791 SKIP(3);
6792 ignoreDepth--;
6793 } else {
6794 NEXT;
6795 }
6796 }
6797
6798 ctxt->disableSAX = state;
6799 ctxt->instate = instate;
6800
6801 if (RAW == 0) {
6802 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6803 goto error;
6804 }
6805 if (ctxt->input->id != id) {
6806 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807 "All markup of the conditional section is"
6808 " not in the same entity\n");
6809 }
6810 SKIP(3);
6811 } else {
6812 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6813 xmlHaltParser(ctxt);
6814 goto error;
6815 }
6816 } else if ((depth > 0) &&
6817 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6818 depth--;
6819 if (ctxt->input->id != inputIds[depth]) {
6820 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6821 "All markup of the conditional section is not"
6822 " in the same entity\n");
6823 }
6824 SKIP(3);
6825 } else {
6826 const xmlChar *check = CUR_PTR;
6827 unsigned int cons = ctxt->input->consumed;
6828
6829 xmlParseMarkupDecl(ctxt);
6830
6831 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6832 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6833 xmlHaltParser(ctxt);
6834 goto error;
6835 }
6836 }
6837
6838 if (depth == 0)
6839 break;
6840
6841 SKIP_BLANKS;
6842 GROW;
6843 }
6844
6845 error:
6846 xmlFree(inputIds);
6847 }
6848
6849 /**
6850 * xmlParseMarkupDecl:
6851 * @ctxt: an XML parser context
6852 *
6853 * parse Markup declarations
6854 *
6855 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6856 * NotationDecl | PI | Comment
6857 *
6858 * [ VC: Proper Declaration/PE Nesting ]
6859 * Parameter-entity replacement text must be properly nested with
6860 * markup declarations. That is to say, if either the first character
6861 * or the last character of a markup declaration (markupdecl above) is
6862 * contained in the replacement text for a parameter-entity reference,
6863 * both must be contained in the same replacement text.
6864 *
6865 * [ WFC: PEs in Internal Subset ]
6866 * In the internal DTD subset, parameter-entity references can occur
6867 * only where markup declarations can occur, not within markup declarations.
6868 * (This does not apply to references that occur in external parameter
6869 * entities or to the external subset.)
6870 */
6871 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6872 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6873 GROW;
6874 if (CUR == '<') {
6875 if (NXT(1) == '!') {
6876 switch (NXT(2)) {
6877 case 'E':
6878 if (NXT(3) == 'L')
6879 xmlParseElementDecl(ctxt);
6880 else if (NXT(3) == 'N')
6881 xmlParseEntityDecl(ctxt);
6882 break;
6883 case 'A':
6884 xmlParseAttributeListDecl(ctxt);
6885 break;
6886 case 'N':
6887 xmlParseNotationDecl(ctxt);
6888 break;
6889 case '-':
6890 xmlParseComment(ctxt);
6891 break;
6892 default:
6893 /* there is an error but it will be detected later */
6894 break;
6895 }
6896 } else if (NXT(1) == '?') {
6897 xmlParsePI(ctxt);
6898 }
6899 }
6900
6901 /*
6902 * detect requirement to exit there and act accordingly
6903 * and avoid having instate overridden later on
6904 */
6905 if (ctxt->instate == XML_PARSER_EOF)
6906 return;
6907
6908 ctxt->instate = XML_PARSER_DTD;
6909 }
6910
6911 /**
6912 * xmlParseTextDecl:
6913 * @ctxt: an XML parser context
6914 *
6915 * parse an XML declaration header for external entities
6916 *
6917 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6918 */
6919
6920 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6921 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6922 xmlChar *version;
6923 const xmlChar *encoding;
6924 int oldstate;
6925
6926 /*
6927 * We know that '<?xml' is here.
6928 */
6929 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6930 SKIP(5);
6931 } else {
6932 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6933 return;
6934 }
6935
6936 /* Avoid expansion of parameter entities when skipping blanks. */
6937 oldstate = ctxt->instate;
6938 ctxt->instate = XML_PARSER_START;
6939
6940 if (SKIP_BLANKS == 0) {
6941 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6942 "Space needed after '<?xml'\n");
6943 }
6944
6945 /*
6946 * We may have the VersionInfo here.
6947 */
6948 version = xmlParseVersionInfo(ctxt);
6949 if (version == NULL)
6950 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6951 else {
6952 if (SKIP_BLANKS == 0) {
6953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6954 "Space needed here\n");
6955 }
6956 }
6957 ctxt->input->version = version;
6958
6959 /*
6960 * We must have the encoding declaration
6961 */
6962 encoding = xmlParseEncodingDecl(ctxt);
6963 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6964 /*
6965 * The XML REC instructs us to stop parsing right here
6966 */
6967 ctxt->instate = oldstate;
6968 return;
6969 }
6970 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6971 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6972 "Missing encoding in text declaration\n");
6973 }
6974
6975 SKIP_BLANKS;
6976 if ((RAW == '?') && (NXT(1) == '>')) {
6977 SKIP(2);
6978 } else if (RAW == '>') {
6979 /* Deprecated old WD ... */
6980 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6981 NEXT;
6982 } else {
6983 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6984 MOVETO_ENDTAG(CUR_PTR);
6985 NEXT;
6986 }
6987
6988 ctxt->instate = oldstate;
6989 }
6990
6991 /**
6992 * xmlParseExternalSubset:
6993 * @ctxt: an XML parser context
6994 * @ExternalID: the external identifier
6995 * @SystemID: the system identifier (or URL)
6996 *
6997 * parse Markup declarations from an external subset
6998 *
6999 * [30] extSubset ::= textDecl? extSubsetDecl
7000 *
7001 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7002 */
7003 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7004 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7005 const xmlChar *SystemID) {
7006 xmlDetectSAX2(ctxt);
7007 GROW;
7008
7009 if ((ctxt->encoding == NULL) &&
7010 (ctxt->input->end - ctxt->input->cur >= 4)) {
7011 xmlChar start[4];
7012 xmlCharEncoding enc;
7013
7014 start[0] = RAW;
7015 start[1] = NXT(1);
7016 start[2] = NXT(2);
7017 start[3] = NXT(3);
7018 enc = xmlDetectCharEncoding(start, 4);
7019 if (enc != XML_CHAR_ENCODING_NONE)
7020 xmlSwitchEncoding(ctxt, enc);
7021 }
7022
7023 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7024 xmlParseTextDecl(ctxt);
7025 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7026 /*
7027 * The XML REC instructs us to stop parsing right here
7028 */
7029 xmlHaltParser(ctxt);
7030 return;
7031 }
7032 }
7033 if (ctxt->myDoc == NULL) {
7034 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7035 if (ctxt->myDoc == NULL) {
7036 xmlErrMemory(ctxt, "New Doc failed");
7037 return;
7038 }
7039 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7040 }
7041 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7042 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7043
7044 ctxt->instate = XML_PARSER_DTD;
7045 ctxt->external = 1;
7046 SKIP_BLANKS;
7047 while (((RAW == '<') && (NXT(1) == '?')) ||
7048 ((RAW == '<') && (NXT(1) == '!')) ||
7049 (RAW == '%')) {
7050 const xmlChar *check = CUR_PTR;
7051 unsigned int cons = ctxt->input->consumed;
7052
7053 GROW;
7054 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7055 xmlParseConditionalSections(ctxt);
7056 } else
7057 xmlParseMarkupDecl(ctxt);
7058 SKIP_BLANKS;
7059
7060 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7061 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7062 break;
7063 }
7064 }
7065
7066 if (RAW != 0) {
7067 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7068 }
7069
7070 }
7071
7072 /**
7073 * xmlParseReference:
7074 * @ctxt: an XML parser context
7075 *
7076 * parse and handle entity references in content, depending on the SAX
7077 * interface, this may end-up in a call to character() if this is a
7078 * CharRef, a predefined entity, if there is no reference() callback.
7079 * or if the parser was asked to switch to that mode.
7080 *
7081 * [67] Reference ::= EntityRef | CharRef
7082 */
7083 void
xmlParseReference(xmlParserCtxtPtr ctxt)7084 xmlParseReference(xmlParserCtxtPtr ctxt) {
7085 xmlEntityPtr ent;
7086 xmlChar *val;
7087 int was_checked;
7088 xmlNodePtr list = NULL;
7089 xmlParserErrors ret = XML_ERR_OK;
7090
7091
7092 if (RAW != '&')
7093 return;
7094
7095 /*
7096 * Simple case of a CharRef
7097 */
7098 if (NXT(1) == '#') {
7099 int i = 0;
7100 xmlChar out[16];
7101 int hex = NXT(2);
7102 int value = xmlParseCharRef(ctxt);
7103
7104 if (value == 0)
7105 return;
7106 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7107 /*
7108 * So we are using non-UTF-8 buffers
7109 * Check that the char fit on 8bits, if not
7110 * generate a CharRef.
7111 */
7112 if (value <= 0xFF) {
7113 out[0] = value;
7114 out[1] = 0;
7115 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7116 (!ctxt->disableSAX))
7117 ctxt->sax->characters(ctxt->userData, out, 1);
7118 } else {
7119 if ((hex == 'x') || (hex == 'X'))
7120 snprintf((char *)out, sizeof(out), "#x%X", value);
7121 else
7122 snprintf((char *)out, sizeof(out), "#%d", value);
7123 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7124 (!ctxt->disableSAX))
7125 ctxt->sax->reference(ctxt->userData, out);
7126 }
7127 } else {
7128 /*
7129 * Just encode the value in UTF-8
7130 */
7131 COPY_BUF(0 ,out, i, value);
7132 out[i] = 0;
7133 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7134 (!ctxt->disableSAX))
7135 ctxt->sax->characters(ctxt->userData, out, i);
7136 }
7137 return;
7138 }
7139
7140 /*
7141 * We are seeing an entity reference
7142 */
7143 ent = xmlParseEntityRef(ctxt);
7144 if (ent == NULL) return;
7145 if (!ctxt->wellFormed)
7146 return;
7147 was_checked = ent->checked;
7148
7149 /* special case of predefined entities */
7150 if ((ent->name == NULL) ||
7151 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7152 val = ent->content;
7153 if (val == NULL) return;
7154 /*
7155 * inline the entity.
7156 */
7157 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7158 (!ctxt->disableSAX))
7159 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7160 return;
7161 }
7162
7163 /*
7164 * The first reference to the entity trigger a parsing phase
7165 * where the ent->children is filled with the result from
7166 * the parsing.
7167 * Note: external parsed entities will not be loaded, it is not
7168 * required for a non-validating parser, unless the parsing option
7169 * of validating, or substituting entities were given. Doing so is
7170 * far more secure as the parser will only process data coming from
7171 * the document entity by default.
7172 */
7173 if (((ent->checked == 0) ||
7174 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7175 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7176 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7177 unsigned long oldnbent = ctxt->nbentities, diff;
7178
7179 /*
7180 * This is a bit hackish but this seems the best
7181 * way to make sure both SAX and DOM entity support
7182 * behaves okay.
7183 */
7184 void *user_data;
7185 if (ctxt->userData == ctxt)
7186 user_data = NULL;
7187 else
7188 user_data = ctxt->userData;
7189
7190 /*
7191 * Check that this entity is well formed
7192 * 4.3.2: An internal general parsed entity is well-formed
7193 * if its replacement text matches the production labeled
7194 * content.
7195 */
7196 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7197 ctxt->depth++;
7198 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7199 user_data, &list);
7200 ctxt->depth--;
7201
7202 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7203 ctxt->depth++;
7204 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7205 user_data, ctxt->depth, ent->URI,
7206 ent->ExternalID, &list);
7207 ctxt->depth--;
7208 } else {
7209 ret = XML_ERR_ENTITY_PE_INTERNAL;
7210 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7211 "invalid entity type found\n", NULL);
7212 }
7213
7214 /*
7215 * Store the number of entities needing parsing for this entity
7216 * content and do checkings
7217 */
7218 diff = ctxt->nbentities - oldnbent + 1;
7219 if (diff > INT_MAX / 2)
7220 diff = INT_MAX / 2;
7221 ent->checked = diff * 2;
7222 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7223 ent->checked |= 1;
7224 if (ret == XML_ERR_ENTITY_LOOP) {
7225 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7226 xmlHaltParser(ctxt);
7227 xmlFreeNodeList(list);
7228 return;
7229 }
7230 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7231 xmlFreeNodeList(list);
7232 return;
7233 }
7234
7235 if ((ret == XML_ERR_OK) && (list != NULL)) {
7236 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7237 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7238 (ent->children == NULL)) {
7239 ent->children = list;
7240 /*
7241 * Prune it directly in the generated document
7242 * except for single text nodes.
7243 */
7244 if ((ctxt->replaceEntities == 0) ||
7245 (ctxt->parseMode == XML_PARSE_READER) ||
7246 ((list->type == XML_TEXT_NODE) &&
7247 (list->next == NULL))) {
7248 ent->owner = 1;
7249 while (list != NULL) {
7250 list->parent = (xmlNodePtr) ent;
7251 xmlSetTreeDoc(list, ent->doc);
7252 if (list->next == NULL)
7253 ent->last = list;
7254 list = list->next;
7255 }
7256 list = NULL;
7257 } else {
7258 ent->owner = 0;
7259 while (list != NULL) {
7260 list->parent = (xmlNodePtr) ctxt->node;
7261 list->doc = ctxt->myDoc;
7262 if (list->next == NULL)
7263 ent->last = list;
7264 list = list->next;
7265 }
7266 list = ent->children;
7267 #ifdef LIBXML_LEGACY_ENABLED
7268 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7269 xmlAddEntityReference(ent, list, NULL);
7270 #endif /* LIBXML_LEGACY_ENABLED */
7271 }
7272 } else {
7273 xmlFreeNodeList(list);
7274 list = NULL;
7275 }
7276 } else if ((ret != XML_ERR_OK) &&
7277 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7278 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7279 "Entity '%s' failed to parse\n", ent->name);
7280 if (ent->content != NULL)
7281 ent->content[0] = 0;
7282 xmlParserEntityCheck(ctxt, 0, ent, 0);
7283 } else if (list != NULL) {
7284 xmlFreeNodeList(list);
7285 list = NULL;
7286 }
7287 if (ent->checked == 0)
7288 ent->checked = 2;
7289
7290 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7291 was_checked = 0;
7292 } else if (ent->checked != 1) {
7293 ctxt->nbentities += ent->checked / 2;
7294 }
7295
7296 /*
7297 * Now that the entity content has been gathered
7298 * provide it to the application, this can take different forms based
7299 * on the parsing modes.
7300 */
7301 if (ent->children == NULL) {
7302 /*
7303 * Probably running in SAX mode and the callbacks don't
7304 * build the entity content. So unless we already went
7305 * though parsing for first checking go though the entity
7306 * content to generate callbacks associated to the entity
7307 */
7308 if (was_checked != 0) {
7309 void *user_data;
7310 /*
7311 * This is a bit hackish but this seems the best
7312 * way to make sure both SAX and DOM entity support
7313 * behaves okay.
7314 */
7315 if (ctxt->userData == ctxt)
7316 user_data = NULL;
7317 else
7318 user_data = ctxt->userData;
7319
7320 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7321 ctxt->depth++;
7322 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7323 ent->content, user_data, NULL);
7324 ctxt->depth--;
7325 } else if (ent->etype ==
7326 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7327 ctxt->depth++;
7328 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7329 ctxt->sax, user_data, ctxt->depth,
7330 ent->URI, ent->ExternalID, NULL);
7331 ctxt->depth--;
7332 } else {
7333 ret = XML_ERR_ENTITY_PE_INTERNAL;
7334 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7335 "invalid entity type found\n", NULL);
7336 }
7337 if (ret == XML_ERR_ENTITY_LOOP) {
7338 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7339 return;
7340 }
7341 }
7342 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7343 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7344 /*
7345 * Entity reference callback comes second, it's somewhat
7346 * superfluous but a compatibility to historical behaviour
7347 */
7348 ctxt->sax->reference(ctxt->userData, ent->name);
7349 }
7350 return;
7351 }
7352
7353 /*
7354 * If we didn't get any children for the entity being built
7355 */
7356 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7357 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7358 /*
7359 * Create a node.
7360 */
7361 ctxt->sax->reference(ctxt->userData, ent->name);
7362 return;
7363 }
7364
7365 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7366 /*
7367 * There is a problem on the handling of _private for entities
7368 * (bug 155816): Should we copy the content of the field from
7369 * the entity (possibly overwriting some value set by the user
7370 * when a copy is created), should we leave it alone, or should
7371 * we try to take care of different situations? The problem
7372 * is exacerbated by the usage of this field by the xmlReader.
7373 * To fix this bug, we look at _private on the created node
7374 * and, if it's NULL, we copy in whatever was in the entity.
7375 * If it's not NULL we leave it alone. This is somewhat of a
7376 * hack - maybe we should have further tests to determine
7377 * what to do.
7378 */
7379 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7380 /*
7381 * Seems we are generating the DOM content, do
7382 * a simple tree copy for all references except the first
7383 * In the first occurrence list contains the replacement.
7384 */
7385 if (((list == NULL) && (ent->owner == 0)) ||
7386 (ctxt->parseMode == XML_PARSE_READER)) {
7387 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7388
7389 /*
7390 * We are copying here, make sure there is no abuse
7391 */
7392 ctxt->sizeentcopy += ent->length + 5;
7393 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7394 return;
7395
7396 /*
7397 * when operating on a reader, the entities definitions
7398 * are always owning the entities subtree.
7399 if (ctxt->parseMode == XML_PARSE_READER)
7400 ent->owner = 1;
7401 */
7402
7403 cur = ent->children;
7404 while (cur != NULL) {
7405 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7406 if (nw != NULL) {
7407 if (nw->_private == NULL)
7408 nw->_private = cur->_private;
7409 if (firstChild == NULL){
7410 firstChild = nw;
7411 }
7412 nw = xmlAddChild(ctxt->node, nw);
7413 }
7414 if (cur == ent->last) {
7415 /*
7416 * needed to detect some strange empty
7417 * node cases in the reader tests
7418 */
7419 if ((ctxt->parseMode == XML_PARSE_READER) &&
7420 (nw != NULL) &&
7421 (nw->type == XML_ELEMENT_NODE) &&
7422 (nw->children == NULL))
7423 nw->extra = 1;
7424
7425 break;
7426 }
7427 cur = cur->next;
7428 }
7429 #ifdef LIBXML_LEGACY_ENABLED
7430 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7431 xmlAddEntityReference(ent, firstChild, nw);
7432 #endif /* LIBXML_LEGACY_ENABLED */
7433 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7434 xmlNodePtr nw = NULL, cur, next, last,
7435 firstChild = NULL;
7436
7437 /*
7438 * We are copying here, make sure there is no abuse
7439 */
7440 ctxt->sizeentcopy += ent->length + 5;
7441 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7442 return;
7443
7444 /*
7445 * Copy the entity child list and make it the new
7446 * entity child list. The goal is to make sure any
7447 * ID or REF referenced will be the one from the
7448 * document content and not the entity copy.
7449 */
7450 cur = ent->children;
7451 ent->children = NULL;
7452 last = ent->last;
7453 ent->last = NULL;
7454 while (cur != NULL) {
7455 next = cur->next;
7456 cur->next = NULL;
7457 cur->parent = NULL;
7458 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7459 if (nw != NULL) {
7460 if (nw->_private == NULL)
7461 nw->_private = cur->_private;
7462 if (firstChild == NULL){
7463 firstChild = cur;
7464 }
7465 xmlAddChild((xmlNodePtr) ent, nw);
7466 xmlAddChild(ctxt->node, cur);
7467 }
7468 if (cur == last)
7469 break;
7470 cur = next;
7471 }
7472 if (ent->owner == 0)
7473 ent->owner = 1;
7474 #ifdef LIBXML_LEGACY_ENABLED
7475 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7476 xmlAddEntityReference(ent, firstChild, nw);
7477 #endif /* LIBXML_LEGACY_ENABLED */
7478 } else {
7479 const xmlChar *nbktext;
7480
7481 /*
7482 * the name change is to avoid coalescing of the
7483 * node with a possible previous text one which
7484 * would make ent->children a dangling pointer
7485 */
7486 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7487 -1);
7488 if (ent->children->type == XML_TEXT_NODE)
7489 ent->children->name = nbktext;
7490 if ((ent->last != ent->children) &&
7491 (ent->last->type == XML_TEXT_NODE))
7492 ent->last->name = nbktext;
7493 xmlAddChildList(ctxt->node, ent->children);
7494 }
7495
7496 /*
7497 * This is to avoid a nasty side effect, see
7498 * characters() in SAX.c
7499 */
7500 ctxt->nodemem = 0;
7501 ctxt->nodelen = 0;
7502 return;
7503 }
7504 }
7505 }
7506
7507 /**
7508 * xmlParseEntityRef:
7509 * @ctxt: an XML parser context
7510 *
7511 * parse ENTITY references declarations
7512 *
7513 * [68] EntityRef ::= '&' Name ';'
7514 *
7515 * [ WFC: Entity Declared ]
7516 * In a document without any DTD, a document with only an internal DTD
7517 * subset which contains no parameter entity references, or a document
7518 * with "standalone='yes'", the Name given in the entity reference
7519 * must match that in an entity declaration, except that well-formed
7520 * documents need not declare any of the following entities: amp, lt,
7521 * gt, apos, quot. The declaration of a parameter entity must precede
7522 * any reference to it. Similarly, the declaration of a general entity
7523 * must precede any reference to it which appears in a default value in an
7524 * attribute-list declaration. Note that if entities are declared in the
7525 * external subset or in external parameter entities, a non-validating
7526 * processor is not obligated to read and process their declarations;
7527 * for such documents, the rule that an entity must be declared is a
7528 * well-formedness constraint only if standalone='yes'.
7529 *
7530 * [ WFC: Parsed Entity ]
7531 * An entity reference must not contain the name of an unparsed entity
7532 *
7533 * Returns the xmlEntityPtr if found, or NULL otherwise.
7534 */
7535 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7536 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7537 const xmlChar *name;
7538 xmlEntityPtr ent = NULL;
7539
7540 GROW;
7541 if (ctxt->instate == XML_PARSER_EOF)
7542 return(NULL);
7543
7544 if (RAW != '&')
7545 return(NULL);
7546 NEXT;
7547 name = xmlParseName(ctxt);
7548 if (name == NULL) {
7549 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7550 "xmlParseEntityRef: no name\n");
7551 return(NULL);
7552 }
7553 if (RAW != ';') {
7554 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7555 return(NULL);
7556 }
7557 NEXT;
7558
7559 /*
7560 * Predefined entities override any extra definition
7561 */
7562 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7563 ent = xmlGetPredefinedEntity(name);
7564 if (ent != NULL)
7565 return(ent);
7566 }
7567
7568 /*
7569 * Increase the number of entity references parsed
7570 */
7571 ctxt->nbentities++;
7572
7573 /*
7574 * Ask first SAX for entity resolution, otherwise try the
7575 * entities which may have stored in the parser context.
7576 */
7577 if (ctxt->sax != NULL) {
7578 if (ctxt->sax->getEntity != NULL)
7579 ent = ctxt->sax->getEntity(ctxt->userData, name);
7580 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7581 (ctxt->options & XML_PARSE_OLDSAX))
7582 ent = xmlGetPredefinedEntity(name);
7583 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7584 (ctxt->userData==ctxt)) {
7585 ent = xmlSAX2GetEntity(ctxt, name);
7586 }
7587 }
7588 if (ctxt->instate == XML_PARSER_EOF)
7589 return(NULL);
7590 /*
7591 * [ WFC: Entity Declared ]
7592 * In a document without any DTD, a document with only an
7593 * internal DTD subset which contains no parameter entity
7594 * references, or a document with "standalone='yes'", the
7595 * Name given in the entity reference must match that in an
7596 * entity declaration, except that well-formed documents
7597 * need not declare any of the following entities: amp, lt,
7598 * gt, apos, quot.
7599 * The declaration of a parameter entity must precede any
7600 * reference to it.
7601 * Similarly, the declaration of a general entity must
7602 * precede any reference to it which appears in a default
7603 * value in an attribute-list declaration. Note that if
7604 * entities are declared in the external subset or in
7605 * external parameter entities, a non-validating processor
7606 * is not obligated to read and process their declarations;
7607 * for such documents, the rule that an entity must be
7608 * declared is a well-formedness constraint only if
7609 * standalone='yes'.
7610 */
7611 if (ent == NULL) {
7612 if ((ctxt->standalone == 1) ||
7613 ((ctxt->hasExternalSubset == 0) &&
7614 (ctxt->hasPErefs == 0))) {
7615 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7616 "Entity '%s' not defined\n", name);
7617 } else {
7618 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7619 "Entity '%s' not defined\n", name);
7620 if ((ctxt->inSubset == 0) &&
7621 (ctxt->sax != NULL) &&
7622 (ctxt->sax->reference != NULL)) {
7623 ctxt->sax->reference(ctxt->userData, name);
7624 }
7625 }
7626 xmlParserEntityCheck(ctxt, 0, ent, 0);
7627 ctxt->valid = 0;
7628 }
7629
7630 /*
7631 * [ WFC: Parsed Entity ]
7632 * An entity reference must not contain the name of an
7633 * unparsed entity
7634 */
7635 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7636 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7637 "Entity reference to unparsed entity %s\n", name);
7638 }
7639
7640 /*
7641 * [ WFC: No External Entity References ]
7642 * Attribute values cannot contain direct or indirect
7643 * entity references to external entities.
7644 */
7645 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7646 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7647 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7648 "Attribute references external entity '%s'\n", name);
7649 }
7650 /*
7651 * [ WFC: No < in Attribute Values ]
7652 * The replacement text of any entity referred to directly or
7653 * indirectly in an attribute value (other than "<") must
7654 * not contain a <.
7655 */
7656 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7657 (ent != NULL) &&
7658 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7659 if (((ent->checked & 1) || (ent->checked == 0)) &&
7660 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7661 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7662 "'<' in entity '%s' is not allowed in attributes values\n", name);
7663 }
7664 }
7665
7666 /*
7667 * Internal check, no parameter entities here ...
7668 */
7669 else {
7670 switch (ent->etype) {
7671 case XML_INTERNAL_PARAMETER_ENTITY:
7672 case XML_EXTERNAL_PARAMETER_ENTITY:
7673 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7674 "Attempt to reference the parameter entity '%s'\n",
7675 name);
7676 break;
7677 default:
7678 break;
7679 }
7680 }
7681
7682 /*
7683 * [ WFC: No Recursion ]
7684 * A parsed entity must not contain a recursive reference
7685 * to itself, either directly or indirectly.
7686 * Done somewhere else
7687 */
7688 return(ent);
7689 }
7690
7691 /**
7692 * xmlParseStringEntityRef:
7693 * @ctxt: an XML parser context
7694 * @str: a pointer to an index in the string
7695 *
7696 * parse ENTITY references declarations, but this version parses it from
7697 * a string value.
7698 *
7699 * [68] EntityRef ::= '&' Name ';'
7700 *
7701 * [ WFC: Entity Declared ]
7702 * In a document without any DTD, a document with only an internal DTD
7703 * subset which contains no parameter entity references, or a document
7704 * with "standalone='yes'", the Name given in the entity reference
7705 * must match that in an entity declaration, except that well-formed
7706 * documents need not declare any of the following entities: amp, lt,
7707 * gt, apos, quot. The declaration of a parameter entity must precede
7708 * any reference to it. Similarly, the declaration of a general entity
7709 * must precede any reference to it which appears in a default value in an
7710 * attribute-list declaration. Note that if entities are declared in the
7711 * external subset or in external parameter entities, a non-validating
7712 * processor is not obligated to read and process their declarations;
7713 * for such documents, the rule that an entity must be declared is a
7714 * well-formedness constraint only if standalone='yes'.
7715 *
7716 * [ WFC: Parsed Entity ]
7717 * An entity reference must not contain the name of an unparsed entity
7718 *
7719 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7720 * is updated to the current location in the string.
7721 */
7722 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7723 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7724 xmlChar *name;
7725 const xmlChar *ptr;
7726 xmlChar cur;
7727 xmlEntityPtr ent = NULL;
7728
7729 if ((str == NULL) || (*str == NULL))
7730 return(NULL);
7731 ptr = *str;
7732 cur = *ptr;
7733 if (cur != '&')
7734 return(NULL);
7735
7736 ptr++;
7737 name = xmlParseStringName(ctxt, &ptr);
7738 if (name == NULL) {
7739 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7740 "xmlParseStringEntityRef: no name\n");
7741 *str = ptr;
7742 return(NULL);
7743 }
7744 if (*ptr != ';') {
7745 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7746 xmlFree(name);
7747 *str = ptr;
7748 return(NULL);
7749 }
7750 ptr++;
7751
7752
7753 /*
7754 * Predefined entities override any extra definition
7755 */
7756 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7757 ent = xmlGetPredefinedEntity(name);
7758 if (ent != NULL) {
7759 xmlFree(name);
7760 *str = ptr;
7761 return(ent);
7762 }
7763 }
7764
7765 /*
7766 * Increase the number of entity references parsed
7767 */
7768 ctxt->nbentities++;
7769
7770 /*
7771 * Ask first SAX for entity resolution, otherwise try the
7772 * entities which may have stored in the parser context.
7773 */
7774 if (ctxt->sax != NULL) {
7775 if (ctxt->sax->getEntity != NULL)
7776 ent = ctxt->sax->getEntity(ctxt->userData, name);
7777 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7778 ent = xmlGetPredefinedEntity(name);
7779 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7780 ent = xmlSAX2GetEntity(ctxt, name);
7781 }
7782 }
7783 if (ctxt->instate == XML_PARSER_EOF) {
7784 xmlFree(name);
7785 return(NULL);
7786 }
7787
7788 /*
7789 * [ WFC: Entity Declared ]
7790 * In a document without any DTD, a document with only an
7791 * internal DTD subset which contains no parameter entity
7792 * references, or a document with "standalone='yes'", the
7793 * Name given in the entity reference must match that in an
7794 * entity declaration, except that well-formed documents
7795 * need not declare any of the following entities: amp, lt,
7796 * gt, apos, quot.
7797 * The declaration of a parameter entity must precede any
7798 * reference to it.
7799 * Similarly, the declaration of a general entity must
7800 * precede any reference to it which appears in a default
7801 * value in an attribute-list declaration. Note that if
7802 * entities are declared in the external subset or in
7803 * external parameter entities, a non-validating processor
7804 * is not obligated to read and process their declarations;
7805 * for such documents, the rule that an entity must be
7806 * declared is a well-formedness constraint only if
7807 * standalone='yes'.
7808 */
7809 if (ent == NULL) {
7810 if ((ctxt->standalone == 1) ||
7811 ((ctxt->hasExternalSubset == 0) &&
7812 (ctxt->hasPErefs == 0))) {
7813 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7814 "Entity '%s' not defined\n", name);
7815 } else {
7816 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7817 "Entity '%s' not defined\n",
7818 name);
7819 }
7820 xmlParserEntityCheck(ctxt, 0, ent, 0);
7821 /* TODO ? check regressions ctxt->valid = 0; */
7822 }
7823
7824 /*
7825 * [ WFC: Parsed Entity ]
7826 * An entity reference must not contain the name of an
7827 * unparsed entity
7828 */
7829 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7830 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7831 "Entity reference to unparsed entity %s\n", name);
7832 }
7833
7834 /*
7835 * [ WFC: No External Entity References ]
7836 * Attribute values cannot contain direct or indirect
7837 * entity references to external entities.
7838 */
7839 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7840 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7841 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7842 "Attribute references external entity '%s'\n", name);
7843 }
7844 /*
7845 * [ WFC: No < in Attribute Values ]
7846 * The replacement text of any entity referred to directly or
7847 * indirectly in an attribute value (other than "<") must
7848 * not contain a <.
7849 */
7850 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7851 (ent != NULL) && (ent->content != NULL) &&
7852 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7853 (xmlStrchr(ent->content, '<'))) {
7854 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7855 "'<' in entity '%s' is not allowed in attributes values\n",
7856 name);
7857 }
7858
7859 /*
7860 * Internal check, no parameter entities here ...
7861 */
7862 else {
7863 switch (ent->etype) {
7864 case XML_INTERNAL_PARAMETER_ENTITY:
7865 case XML_EXTERNAL_PARAMETER_ENTITY:
7866 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7867 "Attempt to reference the parameter entity '%s'\n",
7868 name);
7869 break;
7870 default:
7871 break;
7872 }
7873 }
7874
7875 /*
7876 * [ WFC: No Recursion ]
7877 * A parsed entity must not contain a recursive reference
7878 * to itself, either directly or indirectly.
7879 * Done somewhere else
7880 */
7881
7882 xmlFree(name);
7883 *str = ptr;
7884 return(ent);
7885 }
7886
7887 /**
7888 * xmlParsePEReference:
7889 * @ctxt: an XML parser context
7890 *
7891 * parse PEReference declarations
7892 * The entity content is handled directly by pushing it's content as
7893 * a new input stream.
7894 *
7895 * [69] PEReference ::= '%' Name ';'
7896 *
7897 * [ WFC: No Recursion ]
7898 * A parsed entity must not contain a recursive
7899 * reference to itself, either directly or indirectly.
7900 *
7901 * [ WFC: Entity Declared ]
7902 * In a document without any DTD, a document with only an internal DTD
7903 * subset which contains no parameter entity references, or a document
7904 * with "standalone='yes'", ... ... The declaration of a parameter
7905 * entity must precede any reference to it...
7906 *
7907 * [ VC: Entity Declared ]
7908 * In a document with an external subset or external parameter entities
7909 * with "standalone='no'", ... ... The declaration of a parameter entity
7910 * must precede any reference to it...
7911 *
7912 * [ WFC: In DTD ]
7913 * Parameter-entity references may only appear in the DTD.
7914 * NOTE: misleading but this is handled.
7915 */
7916 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7917 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7918 {
7919 const xmlChar *name;
7920 xmlEntityPtr entity = NULL;
7921 xmlParserInputPtr input;
7922
7923 if (RAW != '%')
7924 return;
7925 NEXT;
7926 name = xmlParseName(ctxt);
7927 if (name == NULL) {
7928 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7929 return;
7930 }
7931 if (xmlParserDebugEntities)
7932 xmlGenericError(xmlGenericErrorContext,
7933 "PEReference: %s\n", name);
7934 if (RAW != ';') {
7935 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7936 return;
7937 }
7938
7939 NEXT;
7940
7941 /*
7942 * Increase the number of entity references parsed
7943 */
7944 ctxt->nbentities++;
7945
7946 /*
7947 * Request the entity from SAX
7948 */
7949 if ((ctxt->sax != NULL) &&
7950 (ctxt->sax->getParameterEntity != NULL))
7951 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7952 if (ctxt->instate == XML_PARSER_EOF)
7953 return;
7954 if (entity == NULL) {
7955 /*
7956 * [ WFC: Entity Declared ]
7957 * In a document without any DTD, a document with only an
7958 * internal DTD subset which contains no parameter entity
7959 * references, or a document with "standalone='yes'", ...
7960 * ... The declaration of a parameter entity must precede
7961 * any reference to it...
7962 */
7963 if ((ctxt->standalone == 1) ||
7964 ((ctxt->hasExternalSubset == 0) &&
7965 (ctxt->hasPErefs == 0))) {
7966 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7967 "PEReference: %%%s; not found\n",
7968 name);
7969 } else {
7970 /*
7971 * [ VC: Entity Declared ]
7972 * In a document with an external subset or external
7973 * parameter entities with "standalone='no'", ...
7974 * ... The declaration of a parameter entity must
7975 * precede any reference to it...
7976 */
7977 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7978 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7979 "PEReference: %%%s; not found\n",
7980 name, NULL);
7981 } else
7982 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7983 "PEReference: %%%s; not found\n",
7984 name, NULL);
7985 ctxt->valid = 0;
7986 }
7987 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7988 } else {
7989 /*
7990 * Internal checking in case the entity quest barfed
7991 */
7992 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7993 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7994 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7995 "Internal: %%%s; is not a parameter entity\n",
7996 name, NULL);
7997 } else {
7998 xmlChar start[4];
7999 xmlCharEncoding enc;
8000
8001 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8002 return;
8003
8004 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8005 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8006 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8007 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8008 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8009 (ctxt->replaceEntities == 0) &&
8010 (ctxt->validate == 0))
8011 return;
8012
8013 input = xmlNewEntityInputStream(ctxt, entity);
8014 if (xmlPushInput(ctxt, input) < 0) {
8015 xmlFreeInputStream(input);
8016 return;
8017 }
8018
8019 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8020 /*
8021 * Get the 4 first bytes and decode the charset
8022 * if enc != XML_CHAR_ENCODING_NONE
8023 * plug some encoding conversion routines.
8024 * Note that, since we may have some non-UTF8
8025 * encoding (like UTF16, bug 135229), the 'length'
8026 * is not known, but we can calculate based upon
8027 * the amount of data in the buffer.
8028 */
8029 GROW
8030 if (ctxt->instate == XML_PARSER_EOF)
8031 return;
8032 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8033 start[0] = RAW;
8034 start[1] = NXT(1);
8035 start[2] = NXT(2);
8036 start[3] = NXT(3);
8037 enc = xmlDetectCharEncoding(start, 4);
8038 if (enc != XML_CHAR_ENCODING_NONE) {
8039 xmlSwitchEncoding(ctxt, enc);
8040 }
8041 }
8042
8043 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8044 (IS_BLANK_CH(NXT(5)))) {
8045 xmlParseTextDecl(ctxt);
8046 }
8047 }
8048 }
8049 }
8050 ctxt->hasPErefs = 1;
8051 }
8052
8053 /**
8054 * xmlLoadEntityContent:
8055 * @ctxt: an XML parser context
8056 * @entity: an unloaded system entity
8057 *
8058 * Load the original content of the given system entity from the
8059 * ExternalID/SystemID given. This is to be used for Included in Literal
8060 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8061 *
8062 * Returns 0 in case of success and -1 in case of failure
8063 */
8064 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8065 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8066 xmlParserInputPtr input;
8067 xmlBufferPtr buf;
8068 int l, c;
8069 int count = 0;
8070
8071 if ((ctxt == NULL) || (entity == NULL) ||
8072 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8073 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8074 (entity->content != NULL)) {
8075 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8076 "xmlLoadEntityContent parameter error");
8077 return(-1);
8078 }
8079
8080 if (xmlParserDebugEntities)
8081 xmlGenericError(xmlGenericErrorContext,
8082 "Reading %s entity content input\n", entity->name);
8083
8084 buf = xmlBufferCreate();
8085 if (buf == NULL) {
8086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8087 "xmlLoadEntityContent parameter error");
8088 return(-1);
8089 }
8090
8091 input = xmlNewEntityInputStream(ctxt, entity);
8092 if (input == NULL) {
8093 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8094 "xmlLoadEntityContent input error");
8095 xmlBufferFree(buf);
8096 return(-1);
8097 }
8098
8099 /*
8100 * Push the entity as the current input, read char by char
8101 * saving to the buffer until the end of the entity or an error
8102 */
8103 if (xmlPushInput(ctxt, input) < 0) {
8104 xmlBufferFree(buf);
8105 return(-1);
8106 }
8107
8108 GROW;
8109 c = CUR_CHAR(l);
8110 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8111 (IS_CHAR(c))) {
8112 xmlBufferAdd(buf, ctxt->input->cur, l);
8113 if (count++ > XML_PARSER_CHUNK_SIZE) {
8114 count = 0;
8115 GROW;
8116 if (ctxt->instate == XML_PARSER_EOF) {
8117 xmlBufferFree(buf);
8118 return(-1);
8119 }
8120 }
8121 NEXTL(l);
8122 c = CUR_CHAR(l);
8123 if (c == 0) {
8124 count = 0;
8125 GROW;
8126 if (ctxt->instate == XML_PARSER_EOF) {
8127 xmlBufferFree(buf);
8128 return(-1);
8129 }
8130 c = CUR_CHAR(l);
8131 }
8132 }
8133
8134 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8135 xmlPopInput(ctxt);
8136 } else if (!IS_CHAR(c)) {
8137 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8138 "xmlLoadEntityContent: invalid char value %d\n",
8139 c);
8140 xmlBufferFree(buf);
8141 return(-1);
8142 }
8143 entity->content = buf->content;
8144 buf->content = NULL;
8145 xmlBufferFree(buf);
8146
8147 return(0);
8148 }
8149
8150 /**
8151 * xmlParseStringPEReference:
8152 * @ctxt: an XML parser context
8153 * @str: a pointer to an index in the string
8154 *
8155 * parse PEReference declarations
8156 *
8157 * [69] PEReference ::= '%' Name ';'
8158 *
8159 * [ WFC: No Recursion ]
8160 * A parsed entity must not contain a recursive
8161 * reference to itself, either directly or indirectly.
8162 *
8163 * [ WFC: Entity Declared ]
8164 * In a document without any DTD, a document with only an internal DTD
8165 * subset which contains no parameter entity references, or a document
8166 * with "standalone='yes'", ... ... The declaration of a parameter
8167 * entity must precede any reference to it...
8168 *
8169 * [ VC: Entity Declared ]
8170 * In a document with an external subset or external parameter entities
8171 * with "standalone='no'", ... ... The declaration of a parameter entity
8172 * must precede any reference to it...
8173 *
8174 * [ WFC: In DTD ]
8175 * Parameter-entity references may only appear in the DTD.
8176 * NOTE: misleading but this is handled.
8177 *
8178 * Returns the string of the entity content.
8179 * str is updated to the current value of the index
8180 */
8181 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8182 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8183 const xmlChar *ptr;
8184 xmlChar cur;
8185 xmlChar *name;
8186 xmlEntityPtr entity = NULL;
8187
8188 if ((str == NULL) || (*str == NULL)) return(NULL);
8189 ptr = *str;
8190 cur = *ptr;
8191 if (cur != '%')
8192 return(NULL);
8193 ptr++;
8194 name = xmlParseStringName(ctxt, &ptr);
8195 if (name == NULL) {
8196 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8197 "xmlParseStringPEReference: no name\n");
8198 *str = ptr;
8199 return(NULL);
8200 }
8201 cur = *ptr;
8202 if (cur != ';') {
8203 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8204 xmlFree(name);
8205 *str = ptr;
8206 return(NULL);
8207 }
8208 ptr++;
8209
8210 /*
8211 * Increase the number of entity references parsed
8212 */
8213 ctxt->nbentities++;
8214
8215 /*
8216 * Request the entity from SAX
8217 */
8218 if ((ctxt->sax != NULL) &&
8219 (ctxt->sax->getParameterEntity != NULL))
8220 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8221 if (ctxt->instate == XML_PARSER_EOF) {
8222 xmlFree(name);
8223 *str = ptr;
8224 return(NULL);
8225 }
8226 if (entity == NULL) {
8227 /*
8228 * [ WFC: Entity Declared ]
8229 * In a document without any DTD, a document with only an
8230 * internal DTD subset which contains no parameter entity
8231 * references, or a document with "standalone='yes'", ...
8232 * ... The declaration of a parameter entity must precede
8233 * any reference to it...
8234 */
8235 if ((ctxt->standalone == 1) ||
8236 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8237 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8238 "PEReference: %%%s; not found\n", name);
8239 } else {
8240 /*
8241 * [ VC: Entity Declared ]
8242 * In a document with an external subset or external
8243 * parameter entities with "standalone='no'", ...
8244 * ... The declaration of a parameter entity must
8245 * precede any reference to it...
8246 */
8247 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8248 "PEReference: %%%s; not found\n",
8249 name, NULL);
8250 ctxt->valid = 0;
8251 }
8252 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8253 } else {
8254 /*
8255 * Internal checking in case the entity quest barfed
8256 */
8257 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8258 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8259 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8260 "%%%s; is not a parameter entity\n",
8261 name, NULL);
8262 }
8263 }
8264 ctxt->hasPErefs = 1;
8265 xmlFree(name);
8266 *str = ptr;
8267 return(entity);
8268 }
8269
8270 /**
8271 * xmlParseDocTypeDecl:
8272 * @ctxt: an XML parser context
8273 *
8274 * parse a DOCTYPE declaration
8275 *
8276 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8277 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8278 *
8279 * [ VC: Root Element Type ]
8280 * The Name in the document type declaration must match the element
8281 * type of the root element.
8282 */
8283
8284 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8285 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8286 const xmlChar *name = NULL;
8287 xmlChar *ExternalID = NULL;
8288 xmlChar *URI = NULL;
8289
8290 /*
8291 * We know that '<!DOCTYPE' has been detected.
8292 */
8293 SKIP(9);
8294
8295 SKIP_BLANKS;
8296
8297 /*
8298 * Parse the DOCTYPE name.
8299 */
8300 name = xmlParseName(ctxt);
8301 if (name == NULL) {
8302 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8303 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8304 }
8305 ctxt->intSubName = name;
8306
8307 SKIP_BLANKS;
8308
8309 /*
8310 * Check for SystemID and ExternalID
8311 */
8312 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8313
8314 if ((URI != NULL) || (ExternalID != NULL)) {
8315 ctxt->hasExternalSubset = 1;
8316 }
8317 ctxt->extSubURI = URI;
8318 ctxt->extSubSystem = ExternalID;
8319
8320 SKIP_BLANKS;
8321
8322 /*
8323 * Create and update the internal subset.
8324 */
8325 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8326 (!ctxt->disableSAX))
8327 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8328 if (ctxt->instate == XML_PARSER_EOF)
8329 return;
8330
8331 /*
8332 * Is there any internal subset declarations ?
8333 * they are handled separately in xmlParseInternalSubset()
8334 */
8335 if (RAW == '[')
8336 return;
8337
8338 /*
8339 * We should be at the end of the DOCTYPE declaration.
8340 */
8341 if (RAW != '>') {
8342 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8343 }
8344 NEXT;
8345 }
8346
8347 /**
8348 * xmlParseInternalSubset:
8349 * @ctxt: an XML parser context
8350 *
8351 * parse the internal subset declaration
8352 *
8353 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8354 */
8355
8356 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8357 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8358 /*
8359 * Is there any DTD definition ?
8360 */
8361 if (RAW == '[') {
8362 int baseInputNr = ctxt->inputNr;
8363 ctxt->instate = XML_PARSER_DTD;
8364 NEXT;
8365 /*
8366 * Parse the succession of Markup declarations and
8367 * PEReferences.
8368 * Subsequence (markupdecl | PEReference | S)*
8369 */
8370 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8371 (ctxt->instate != XML_PARSER_EOF)) {
8372 const xmlChar *check = CUR_PTR;
8373 unsigned int cons = ctxt->input->consumed;
8374
8375 SKIP_BLANKS;
8376 xmlParseMarkupDecl(ctxt);
8377 xmlParsePEReference(ctxt);
8378
8379 /*
8380 * Conditional sections are allowed from external entities included
8381 * by PE References in the internal subset.
8382 */
8383 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8384 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8385 xmlParseConditionalSections(ctxt);
8386 }
8387
8388 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8389 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8390 "xmlParseInternalSubset: error detected in Markup declaration\n");
8391 if (ctxt->inputNr > baseInputNr)
8392 xmlPopInput(ctxt);
8393 else
8394 break;
8395 }
8396 }
8397 if (RAW == ']') {
8398 NEXT;
8399 SKIP_BLANKS;
8400 }
8401 }
8402
8403 /*
8404 * We should be at the end of the DOCTYPE declaration.
8405 */
8406 if (RAW != '>') {
8407 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8408 return;
8409 }
8410 NEXT;
8411 }
8412
8413 #ifdef LIBXML_SAX1_ENABLED
8414 /**
8415 * xmlParseAttribute:
8416 * @ctxt: an XML parser context
8417 * @value: a xmlChar ** used to store the value of the attribute
8418 *
8419 * parse an attribute
8420 *
8421 * [41] Attribute ::= Name Eq AttValue
8422 *
8423 * [ WFC: No External Entity References ]
8424 * Attribute values cannot contain direct or indirect entity references
8425 * to external entities.
8426 *
8427 * [ WFC: No < in Attribute Values ]
8428 * The replacement text of any entity referred to directly or indirectly in
8429 * an attribute value (other than "<") must not contain a <.
8430 *
8431 * [ VC: Attribute Value Type ]
8432 * The attribute must have been declared; the value must be of the type
8433 * declared for it.
8434 *
8435 * [25] Eq ::= S? '=' S?
8436 *
8437 * With namespace:
8438 *
8439 * [NS 11] Attribute ::= QName Eq AttValue
8440 *
8441 * Also the case QName == xmlns:??? is handled independently as a namespace
8442 * definition.
8443 *
8444 * Returns the attribute name, and the value in *value.
8445 */
8446
8447 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8448 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8449 const xmlChar *name;
8450 xmlChar *val;
8451
8452 *value = NULL;
8453 GROW;
8454 name = xmlParseName(ctxt);
8455 if (name == NULL) {
8456 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8457 "error parsing attribute name\n");
8458 return(NULL);
8459 }
8460
8461 /*
8462 * read the value
8463 */
8464 SKIP_BLANKS;
8465 if (RAW == '=') {
8466 NEXT;
8467 SKIP_BLANKS;
8468 val = xmlParseAttValue(ctxt);
8469 ctxt->instate = XML_PARSER_CONTENT;
8470 } else {
8471 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8472 "Specification mandates value for attribute %s\n", name);
8473 return(NULL);
8474 }
8475
8476 /*
8477 * Check that xml:lang conforms to the specification
8478 * No more registered as an error, just generate a warning now
8479 * since this was deprecated in XML second edition
8480 */
8481 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8482 if (!xmlCheckLanguageID(val)) {
8483 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8484 "Malformed value for xml:lang : %s\n",
8485 val, NULL);
8486 }
8487 }
8488
8489 /*
8490 * Check that xml:space conforms to the specification
8491 */
8492 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8493 if (xmlStrEqual(val, BAD_CAST "default"))
8494 *(ctxt->space) = 0;
8495 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8496 *(ctxt->space) = 1;
8497 else {
8498 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8499 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8500 val, NULL);
8501 }
8502 }
8503
8504 *value = val;
8505 return(name);
8506 }
8507
8508 /**
8509 * xmlParseStartTag:
8510 * @ctxt: an XML parser context
8511 *
8512 * parse a start of tag either for rule element or
8513 * EmptyElement. In both case we don't parse the tag closing chars.
8514 *
8515 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8516 *
8517 * [ WFC: Unique Att Spec ]
8518 * No attribute name may appear more than once in the same start-tag or
8519 * empty-element tag.
8520 *
8521 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8522 *
8523 * [ WFC: Unique Att Spec ]
8524 * No attribute name may appear more than once in the same start-tag or
8525 * empty-element tag.
8526 *
8527 * With namespace:
8528 *
8529 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8530 *
8531 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8532 *
8533 * Returns the element name parsed
8534 */
8535
8536 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8537 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8538 const xmlChar *name;
8539 const xmlChar *attname;
8540 xmlChar *attvalue;
8541 const xmlChar **atts = ctxt->atts;
8542 int nbatts = 0;
8543 int maxatts = ctxt->maxatts;
8544 int i;
8545
8546 if (RAW != '<') return(NULL);
8547 NEXT1;
8548
8549 name = xmlParseName(ctxt);
8550 if (name == NULL) {
8551 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8552 "xmlParseStartTag: invalid element name\n");
8553 return(NULL);
8554 }
8555
8556 /*
8557 * Now parse the attributes, it ends up with the ending
8558 *
8559 * (S Attribute)* S?
8560 */
8561 SKIP_BLANKS;
8562 GROW;
8563
8564 while (((RAW != '>') &&
8565 ((RAW != '/') || (NXT(1) != '>')) &&
8566 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8567 const xmlChar *q = CUR_PTR;
8568 unsigned int cons = ctxt->input->consumed;
8569
8570 attname = xmlParseAttribute(ctxt, &attvalue);
8571 if ((attname != NULL) && (attvalue != NULL)) {
8572 /*
8573 * [ WFC: Unique Att Spec ]
8574 * No attribute name may appear more than once in the same
8575 * start-tag or empty-element tag.
8576 */
8577 for (i = 0; i < nbatts;i += 2) {
8578 if (xmlStrEqual(atts[i], attname)) {
8579 xmlErrAttributeDup(ctxt, NULL, attname);
8580 xmlFree(attvalue);
8581 goto failed;
8582 }
8583 }
8584 /*
8585 * Add the pair to atts
8586 */
8587 if (atts == NULL) {
8588 maxatts = 22; /* allow for 10 attrs by default */
8589 atts = (const xmlChar **)
8590 xmlMalloc(maxatts * sizeof(xmlChar *));
8591 if (atts == NULL) {
8592 xmlErrMemory(ctxt, NULL);
8593 if (attvalue != NULL)
8594 xmlFree(attvalue);
8595 goto failed;
8596 }
8597 ctxt->atts = atts;
8598 ctxt->maxatts = maxatts;
8599 } else if (nbatts + 4 > maxatts) {
8600 const xmlChar **n;
8601
8602 maxatts *= 2;
8603 n = (const xmlChar **) xmlRealloc((void *) atts,
8604 maxatts * sizeof(const xmlChar *));
8605 if (n == NULL) {
8606 xmlErrMemory(ctxt, NULL);
8607 if (attvalue != NULL)
8608 xmlFree(attvalue);
8609 goto failed;
8610 }
8611 atts = n;
8612 ctxt->atts = atts;
8613 ctxt->maxatts = maxatts;
8614 }
8615 atts[nbatts++] = attname;
8616 atts[nbatts++] = attvalue;
8617 atts[nbatts] = NULL;
8618 atts[nbatts + 1] = NULL;
8619 } else {
8620 if (attvalue != NULL)
8621 xmlFree(attvalue);
8622 }
8623
8624 failed:
8625
8626 GROW
8627 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8628 break;
8629 if (SKIP_BLANKS == 0) {
8630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8631 "attributes construct error\n");
8632 }
8633 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8634 (attname == NULL) && (attvalue == NULL)) {
8635 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8636 "xmlParseStartTag: problem parsing attributes\n");
8637 break;
8638 }
8639 SHRINK;
8640 GROW;
8641 }
8642
8643 /*
8644 * SAX: Start of Element !
8645 */
8646 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8647 (!ctxt->disableSAX)) {
8648 if (nbatts > 0)
8649 ctxt->sax->startElement(ctxt->userData, name, atts);
8650 else
8651 ctxt->sax->startElement(ctxt->userData, name, NULL);
8652 }
8653
8654 if (atts != NULL) {
8655 /* Free only the content strings */
8656 for (i = 1;i < nbatts;i+=2)
8657 if (atts[i] != NULL)
8658 xmlFree((xmlChar *) atts[i]);
8659 }
8660 return(name);
8661 }
8662
8663 /**
8664 * xmlParseEndTag1:
8665 * @ctxt: an XML parser context
8666 * @line: line of the start tag
8667 * @nsNr: number of namespaces on the start tag
8668 *
8669 * parse an end of tag
8670 *
8671 * [42] ETag ::= '</' Name S? '>'
8672 *
8673 * With namespace
8674 *
8675 * [NS 9] ETag ::= '</' QName S? '>'
8676 */
8677
8678 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8679 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8680 const xmlChar *name;
8681
8682 GROW;
8683 if ((RAW != '<') || (NXT(1) != '/')) {
8684 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8685 "xmlParseEndTag: '</' not found\n");
8686 return;
8687 }
8688 SKIP(2);
8689
8690 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8691
8692 /*
8693 * We should definitely be at the ending "S? '>'" part
8694 */
8695 GROW;
8696 SKIP_BLANKS;
8697 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8698 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8699 } else
8700 NEXT1;
8701
8702 /*
8703 * [ WFC: Element Type Match ]
8704 * The Name in an element's end-tag must match the element type in the
8705 * start-tag.
8706 *
8707 */
8708 if (name != (xmlChar*)1) {
8709 if (name == NULL) name = BAD_CAST "unparsable";
8710 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8711 "Opening and ending tag mismatch: %s line %d and %s\n",
8712 ctxt->name, line, name);
8713 }
8714
8715 /*
8716 * SAX: End of Tag
8717 */
8718 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8719 (!ctxt->disableSAX))
8720 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8721
8722 namePop(ctxt);
8723 spacePop(ctxt);
8724 return;
8725 }
8726
8727 /**
8728 * xmlParseEndTag:
8729 * @ctxt: an XML parser context
8730 *
8731 * parse an end of tag
8732 *
8733 * [42] ETag ::= '</' Name S? '>'
8734 *
8735 * With namespace
8736 *
8737 * [NS 9] ETag ::= '</' QName S? '>'
8738 */
8739
8740 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8741 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8742 xmlParseEndTag1(ctxt, 0);
8743 }
8744 #endif /* LIBXML_SAX1_ENABLED */
8745
8746 /************************************************************************
8747 * *
8748 * SAX 2 specific operations *
8749 * *
8750 ************************************************************************/
8751
8752 /*
8753 * xmlGetNamespace:
8754 * @ctxt: an XML parser context
8755 * @prefix: the prefix to lookup
8756 *
8757 * Lookup the namespace name for the @prefix (which ca be NULL)
8758 * The prefix must come from the @ctxt->dict dictionary
8759 *
8760 * Returns the namespace name or NULL if not bound
8761 */
8762 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8763 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8764 int i;
8765
8766 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8767 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8768 if (ctxt->nsTab[i] == prefix) {
8769 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8770 return(NULL);
8771 return(ctxt->nsTab[i + 1]);
8772 }
8773 return(NULL);
8774 }
8775
8776 /**
8777 * xmlParseQName:
8778 * @ctxt: an XML parser context
8779 * @prefix: pointer to store the prefix part
8780 *
8781 * parse an XML Namespace QName
8782 *
8783 * [6] QName ::= (Prefix ':')? LocalPart
8784 * [7] Prefix ::= NCName
8785 * [8] LocalPart ::= NCName
8786 *
8787 * Returns the Name parsed or NULL
8788 */
8789
8790 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8791 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8792 const xmlChar *l, *p;
8793
8794 GROW;
8795
8796 l = xmlParseNCName(ctxt);
8797 if (l == NULL) {
8798 if (CUR == ':') {
8799 l = xmlParseName(ctxt);
8800 if (l != NULL) {
8801 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8802 "Failed to parse QName '%s'\n", l, NULL, NULL);
8803 *prefix = NULL;
8804 return(l);
8805 }
8806 }
8807 return(NULL);
8808 }
8809 if (CUR == ':') {
8810 NEXT;
8811 p = l;
8812 l = xmlParseNCName(ctxt);
8813 if (l == NULL) {
8814 xmlChar *tmp;
8815
8816 if (ctxt->instate == XML_PARSER_EOF)
8817 return(NULL);
8818 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8819 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8820 l = xmlParseNmtoken(ctxt);
8821 if (l == NULL) {
8822 if (ctxt->instate == XML_PARSER_EOF)
8823 return(NULL);
8824 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8825 } else {
8826 tmp = xmlBuildQName(l, p, NULL, 0);
8827 xmlFree((char *)l);
8828 }
8829 p = xmlDictLookup(ctxt->dict, tmp, -1);
8830 if (tmp != NULL) xmlFree(tmp);
8831 *prefix = NULL;
8832 return(p);
8833 }
8834 if (CUR == ':') {
8835 xmlChar *tmp;
8836
8837 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8838 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8839 NEXT;
8840 tmp = (xmlChar *) xmlParseName(ctxt);
8841 if (tmp != NULL) {
8842 tmp = xmlBuildQName(tmp, l, NULL, 0);
8843 l = xmlDictLookup(ctxt->dict, tmp, -1);
8844 if (tmp != NULL) xmlFree(tmp);
8845 *prefix = p;
8846 return(l);
8847 }
8848 if (ctxt->instate == XML_PARSER_EOF)
8849 return(NULL);
8850 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8851 l = xmlDictLookup(ctxt->dict, tmp, -1);
8852 if (tmp != NULL) xmlFree(tmp);
8853 *prefix = p;
8854 return(l);
8855 }
8856 *prefix = p;
8857 } else
8858 *prefix = NULL;
8859 return(l);
8860 }
8861
8862 /**
8863 * xmlParseQNameAndCompare:
8864 * @ctxt: an XML parser context
8865 * @name: the localname
8866 * @prefix: the prefix, if any.
8867 *
8868 * parse an XML name and compares for match
8869 * (specialized for endtag parsing)
8870 *
8871 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8872 * and the name for mismatch
8873 */
8874
8875 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8876 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8877 xmlChar const *prefix) {
8878 const xmlChar *cmp;
8879 const xmlChar *in;
8880 const xmlChar *ret;
8881 const xmlChar *prefix2;
8882
8883 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8884
8885 GROW;
8886 in = ctxt->input->cur;
8887
8888 cmp = prefix;
8889 while (*in != 0 && *in == *cmp) {
8890 ++in;
8891 ++cmp;
8892 }
8893 if ((*cmp == 0) && (*in == ':')) {
8894 in++;
8895 cmp = name;
8896 while (*in != 0 && *in == *cmp) {
8897 ++in;
8898 ++cmp;
8899 }
8900 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8901 /* success */
8902 ctxt->input->col += in - ctxt->input->cur;
8903 ctxt->input->cur = in;
8904 return((const xmlChar*) 1);
8905 }
8906 }
8907 /*
8908 * all strings coms from the dictionary, equality can be done directly
8909 */
8910 ret = xmlParseQName (ctxt, &prefix2);
8911 if ((ret == name) && (prefix == prefix2))
8912 return((const xmlChar*) 1);
8913 return ret;
8914 }
8915
8916 /**
8917 * xmlParseAttValueInternal:
8918 * @ctxt: an XML parser context
8919 * @len: attribute len result
8920 * @alloc: whether the attribute was reallocated as a new string
8921 * @normalize: if 1 then further non-CDATA normalization must be done
8922 *
8923 * parse a value for an attribute.
8924 * NOTE: if no normalization is needed, the routine will return pointers
8925 * directly from the data buffer.
8926 *
8927 * 3.3.3 Attribute-Value Normalization:
8928 * Before the value of an attribute is passed to the application or
8929 * checked for validity, the XML processor must normalize it as follows:
8930 * - a character reference is processed by appending the referenced
8931 * character to the attribute value
8932 * - an entity reference is processed by recursively processing the
8933 * replacement text of the entity
8934 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8935 * appending #x20 to the normalized value, except that only a single
8936 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8937 * parsed entity or the literal entity value of an internal parsed entity
8938 * - other characters are processed by appending them to the normalized value
8939 * If the declared value is not CDATA, then the XML processor must further
8940 * process the normalized attribute value by discarding any leading and
8941 * trailing space (#x20) characters, and by replacing sequences of space
8942 * (#x20) characters by a single space (#x20) character.
8943 * All attributes for which no declaration has been read should be treated
8944 * by a non-validating parser as if declared CDATA.
8945 *
8946 * Returns the AttValue parsed or NULL. The value has to be freed by the
8947 * caller if it was copied, this can be detected by val[*len] == 0.
8948 */
8949
8950 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8951 const xmlChar *oldbase = ctxt->input->base;\
8952 GROW;\
8953 if (ctxt->instate == XML_PARSER_EOF)\
8954 return(NULL);\
8955 if (oldbase != ctxt->input->base) {\
8956 ptrdiff_t delta = ctxt->input->base - oldbase;\
8957 start = start + delta;\
8958 in = in + delta;\
8959 }\
8960 end = ctxt->input->end;
8961
8962 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8963 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8964 int normalize)
8965 {
8966 xmlChar limit = 0;
8967 const xmlChar *in = NULL, *start, *end, *last;
8968 xmlChar *ret = NULL;
8969 int line, col;
8970 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8971 XML_MAX_HUGE_LENGTH :
8972 XML_MAX_TEXT_LENGTH;
8973
8974 GROW;
8975 in = (xmlChar *) CUR_PTR;
8976 line = ctxt->input->line;
8977 col = ctxt->input->col;
8978 if (*in != '"' && *in != '\'') {
8979 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8980 return (NULL);
8981 }
8982 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8983
8984 /*
8985 * try to handle in this routine the most common case where no
8986 * allocation of a new string is required and where content is
8987 * pure ASCII.
8988 */
8989 limit = *in++;
8990 col++;
8991 end = ctxt->input->end;
8992 start = in;
8993 if (in >= end) {
8994 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8995 }
8996 if (normalize) {
8997 /*
8998 * Skip any leading spaces
8999 */
9000 while ((in < end) && (*in != limit) &&
9001 ((*in == 0x20) || (*in == 0x9) ||
9002 (*in == 0xA) || (*in == 0xD))) {
9003 if (*in == 0xA) {
9004 line++; col = 1;
9005 } else {
9006 col++;
9007 }
9008 in++;
9009 start = in;
9010 if (in >= end) {
9011 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9012 if ((in - start) > maxLength) {
9013 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9014 "AttValue length too long\n");
9015 return(NULL);
9016 }
9017 }
9018 }
9019 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9020 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9021 col++;
9022 if ((*in++ == 0x20) && (*in == 0x20)) break;
9023 if (in >= end) {
9024 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9025 if ((in - start) > maxLength) {
9026 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9027 "AttValue length too long\n");
9028 return(NULL);
9029 }
9030 }
9031 }
9032 last = in;
9033 /*
9034 * skip the trailing blanks
9035 */
9036 while ((last[-1] == 0x20) && (last > start)) last--;
9037 while ((in < end) && (*in != limit) &&
9038 ((*in == 0x20) || (*in == 0x9) ||
9039 (*in == 0xA) || (*in == 0xD))) {
9040 if (*in == 0xA) {
9041 line++, col = 1;
9042 } else {
9043 col++;
9044 }
9045 in++;
9046 if (in >= end) {
9047 const xmlChar *oldbase = ctxt->input->base;
9048 GROW;
9049 if (ctxt->instate == XML_PARSER_EOF)
9050 return(NULL);
9051 if (oldbase != ctxt->input->base) {
9052 ptrdiff_t delta = ctxt->input->base - oldbase;
9053 start = start + delta;
9054 in = in + delta;
9055 last = last + delta;
9056 }
9057 end = ctxt->input->end;
9058 if ((in - start) > maxLength) {
9059 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9060 "AttValue length too long\n");
9061 return(NULL);
9062 }
9063 }
9064 }
9065 if ((in - start) > maxLength) {
9066 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9067 "AttValue length too long\n");
9068 return(NULL);
9069 }
9070 if (*in != limit) goto need_complex;
9071 } else {
9072 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9073 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9074 in++;
9075 col++;
9076 if (in >= end) {
9077 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9078 if ((in - start) > maxLength) {
9079 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9080 "AttValue length too long\n");
9081 return(NULL);
9082 }
9083 }
9084 }
9085 last = in;
9086 if ((in - start) > maxLength) {
9087 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9088 "AttValue length too long\n");
9089 return(NULL);
9090 }
9091 if (*in != limit) goto need_complex;
9092 }
9093 in++;
9094 col++;
9095 if (len != NULL) {
9096 *len = last - start;
9097 ret = (xmlChar *) start;
9098 } else {
9099 if (alloc) *alloc = 1;
9100 ret = xmlStrndup(start, last - start);
9101 }
9102 CUR_PTR = in;
9103 ctxt->input->line = line;
9104 ctxt->input->col = col;
9105 if (alloc) *alloc = 0;
9106 return ret;
9107 need_complex:
9108 if (alloc) *alloc = 1;
9109 return xmlParseAttValueComplex(ctxt, len, normalize);
9110 }
9111
9112 /**
9113 * xmlParseAttribute2:
9114 * @ctxt: an XML parser context
9115 * @pref: the element prefix
9116 * @elem: the element name
9117 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9118 * @value: a xmlChar ** used to store the value of the attribute
9119 * @len: an int * to save the length of the attribute
9120 * @alloc: an int * to indicate if the attribute was allocated
9121 *
9122 * parse an attribute in the new SAX2 framework.
9123 *
9124 * Returns the attribute name, and the value in *value, .
9125 */
9126
9127 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9128 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9129 const xmlChar * pref, const xmlChar * elem,
9130 const xmlChar ** prefix, xmlChar ** value,
9131 int *len, int *alloc)
9132 {
9133 const xmlChar *name;
9134 xmlChar *val, *internal_val = NULL;
9135 int normalize = 0;
9136
9137 *value = NULL;
9138 GROW;
9139 name = xmlParseQName(ctxt, prefix);
9140 if (name == NULL) {
9141 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9142 "error parsing attribute name\n");
9143 return (NULL);
9144 }
9145
9146 /*
9147 * get the type if needed
9148 */
9149 if (ctxt->attsSpecial != NULL) {
9150 int type;
9151
9152 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9153 pref, elem, *prefix, name);
9154 if (type != 0)
9155 normalize = 1;
9156 }
9157
9158 /*
9159 * read the value
9160 */
9161 SKIP_BLANKS;
9162 if (RAW == '=') {
9163 NEXT;
9164 SKIP_BLANKS;
9165 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9166 if (normalize) {
9167 /*
9168 * Sometimes a second normalisation pass for spaces is needed
9169 * but that only happens if charrefs or entities references
9170 * have been used in the attribute value, i.e. the attribute
9171 * value have been extracted in an allocated string already.
9172 */
9173 if (*alloc) {
9174 const xmlChar *val2;
9175
9176 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9177 if ((val2 != NULL) && (val2 != val)) {
9178 xmlFree(val);
9179 val = (xmlChar *) val2;
9180 }
9181 }
9182 }
9183 ctxt->instate = XML_PARSER_CONTENT;
9184 } else {
9185 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9186 "Specification mandates value for attribute %s\n",
9187 name);
9188 return (NULL);
9189 }
9190
9191 if (*prefix == ctxt->str_xml) {
9192 /*
9193 * Check that xml:lang conforms to the specification
9194 * No more registered as an error, just generate a warning now
9195 * since this was deprecated in XML second edition
9196 */
9197 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9198 internal_val = xmlStrndup(val, *len);
9199 if (!xmlCheckLanguageID(internal_val)) {
9200 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9201 "Malformed value for xml:lang : %s\n",
9202 internal_val, NULL);
9203 }
9204 }
9205
9206 /*
9207 * Check that xml:space conforms to the specification
9208 */
9209 if (xmlStrEqual(name, BAD_CAST "space")) {
9210 internal_val = xmlStrndup(val, *len);
9211 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9212 *(ctxt->space) = 0;
9213 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9214 *(ctxt->space) = 1;
9215 else {
9216 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9217 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9218 internal_val, NULL);
9219 }
9220 }
9221 if (internal_val) {
9222 xmlFree(internal_val);
9223 }
9224 }
9225
9226 *value = val;
9227 return (name);
9228 }
9229 /**
9230 * xmlParseStartTag2:
9231 * @ctxt: an XML parser context
9232 *
9233 * parse a start of tag either for rule element or
9234 * EmptyElement. In both case we don't parse the tag closing chars.
9235 * This routine is called when running SAX2 parsing
9236 *
9237 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9238 *
9239 * [ WFC: Unique Att Spec ]
9240 * No attribute name may appear more than once in the same start-tag or
9241 * empty-element tag.
9242 *
9243 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9244 *
9245 * [ WFC: Unique Att Spec ]
9246 * No attribute name may appear more than once in the same start-tag or
9247 * empty-element tag.
9248 *
9249 * With namespace:
9250 *
9251 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9252 *
9253 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9254 *
9255 * Returns the element name parsed
9256 */
9257
9258 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9259 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9260 const xmlChar **URI, int *tlen) {
9261 const xmlChar *localname;
9262 const xmlChar *prefix;
9263 const xmlChar *attname;
9264 const xmlChar *aprefix;
9265 const xmlChar *nsname;
9266 xmlChar *attvalue;
9267 const xmlChar **atts = ctxt->atts;
9268 int maxatts = ctxt->maxatts;
9269 int nratts, nbatts, nbdef, inputid;
9270 int i, j, nbNs, attval;
9271 unsigned long cur;
9272 int nsNr = ctxt->nsNr;
9273
9274 if (RAW != '<') return(NULL);
9275 NEXT1;
9276
9277 /*
9278 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9279 * point since the attribute values may be stored as pointers to
9280 * the buffer and calling SHRINK would destroy them !
9281 * The Shrinking is only possible once the full set of attribute
9282 * callbacks have been done.
9283 */
9284 SHRINK;
9285 cur = ctxt->input->cur - ctxt->input->base;
9286 inputid = ctxt->input->id;
9287 nbatts = 0;
9288 nratts = 0;
9289 nbdef = 0;
9290 nbNs = 0;
9291 attval = 0;
9292 /* Forget any namespaces added during an earlier parse of this element. */
9293 ctxt->nsNr = nsNr;
9294
9295 localname = xmlParseQName(ctxt, &prefix);
9296 if (localname == NULL) {
9297 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9298 "StartTag: invalid element name\n");
9299 return(NULL);
9300 }
9301 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9302
9303 /*
9304 * Now parse the attributes, it ends up with the ending
9305 *
9306 * (S Attribute)* S?
9307 */
9308 SKIP_BLANKS;
9309 GROW;
9310
9311 while (((RAW != '>') &&
9312 ((RAW != '/') || (NXT(1) != '>')) &&
9313 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9314 const xmlChar *q = CUR_PTR;
9315 unsigned int cons = ctxt->input->consumed;
9316 int len = -1, alloc = 0;
9317
9318 attname = xmlParseAttribute2(ctxt, prefix, localname,
9319 &aprefix, &attvalue, &len, &alloc);
9320 if ((attname == NULL) || (attvalue == NULL))
9321 goto next_attr;
9322 if (len < 0) len = xmlStrlen(attvalue);
9323
9324 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9325 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9326 xmlURIPtr uri;
9327
9328 if (URL == NULL) {
9329 xmlErrMemory(ctxt, "dictionary allocation failure");
9330 if ((attvalue != NULL) && (alloc != 0))
9331 xmlFree(attvalue);
9332 localname = NULL;
9333 goto done;
9334 }
9335 if (*URL != 0) {
9336 uri = xmlParseURI((const char *) URL);
9337 if (uri == NULL) {
9338 xmlNsErr(ctxt, XML_WAR_NS_URI,
9339 "xmlns: '%s' is not a valid URI\n",
9340 URL, NULL, NULL);
9341 } else {
9342 if (uri->scheme == NULL) {
9343 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9344 "xmlns: URI %s is not absolute\n",
9345 URL, NULL, NULL);
9346 }
9347 xmlFreeURI(uri);
9348 }
9349 if (URL == ctxt->str_xml_ns) {
9350 if (attname != ctxt->str_xml) {
9351 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9352 "xml namespace URI cannot be the default namespace\n",
9353 NULL, NULL, NULL);
9354 }
9355 goto next_attr;
9356 }
9357 if ((len == 29) &&
9358 (xmlStrEqual(URL,
9359 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9360 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9361 "reuse of the xmlns namespace name is forbidden\n",
9362 NULL, NULL, NULL);
9363 goto next_attr;
9364 }
9365 }
9366 /*
9367 * check that it's not a defined namespace
9368 */
9369 for (j = 1;j <= nbNs;j++)
9370 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9371 break;
9372 if (j <= nbNs)
9373 xmlErrAttributeDup(ctxt, NULL, attname);
9374 else
9375 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9376
9377 } else if (aprefix == ctxt->str_xmlns) {
9378 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9379 xmlURIPtr uri;
9380
9381 if (attname == ctxt->str_xml) {
9382 if (URL != ctxt->str_xml_ns) {
9383 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9384 "xml namespace prefix mapped to wrong URI\n",
9385 NULL, NULL, NULL);
9386 }
9387 /*
9388 * Do not keep a namespace definition node
9389 */
9390 goto next_attr;
9391 }
9392 if (URL == ctxt->str_xml_ns) {
9393 if (attname != ctxt->str_xml) {
9394 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9395 "xml namespace URI mapped to wrong prefix\n",
9396 NULL, NULL, NULL);
9397 }
9398 goto next_attr;
9399 }
9400 if (attname == ctxt->str_xmlns) {
9401 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9402 "redefinition of the xmlns prefix is forbidden\n",
9403 NULL, NULL, NULL);
9404 goto next_attr;
9405 }
9406 if ((len == 29) &&
9407 (xmlStrEqual(URL,
9408 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9409 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9410 "reuse of the xmlns namespace name is forbidden\n",
9411 NULL, NULL, NULL);
9412 goto next_attr;
9413 }
9414 if ((URL == NULL) || (URL[0] == 0)) {
9415 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9416 "xmlns:%s: Empty XML namespace is not allowed\n",
9417 attname, NULL, NULL);
9418 goto next_attr;
9419 } else {
9420 uri = xmlParseURI((const char *) URL);
9421 if (uri == NULL) {
9422 xmlNsErr(ctxt, XML_WAR_NS_URI,
9423 "xmlns:%s: '%s' is not a valid URI\n",
9424 attname, URL, NULL);
9425 } else {
9426 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9427 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9428 "xmlns:%s: URI %s is not absolute\n",
9429 attname, URL, NULL);
9430 }
9431 xmlFreeURI(uri);
9432 }
9433 }
9434
9435 /*
9436 * check that it's not a defined namespace
9437 */
9438 for (j = 1;j <= nbNs;j++)
9439 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9440 break;
9441 if (j <= nbNs)
9442 xmlErrAttributeDup(ctxt, aprefix, attname);
9443 else
9444 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9445
9446 } else {
9447 /*
9448 * Add the pair to atts
9449 */
9450 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9451 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9452 goto next_attr;
9453 }
9454 maxatts = ctxt->maxatts;
9455 atts = ctxt->atts;
9456 }
9457 ctxt->attallocs[nratts++] = alloc;
9458 atts[nbatts++] = attname;
9459 atts[nbatts++] = aprefix;
9460 /*
9461 * The namespace URI field is used temporarily to point at the
9462 * base of the current input buffer for non-alloced attributes.
9463 * When the input buffer is reallocated, all the pointers become
9464 * invalid, but they can be reconstructed later.
9465 */
9466 if (alloc)
9467 atts[nbatts++] = NULL;
9468 else
9469 atts[nbatts++] = ctxt->input->base;
9470 atts[nbatts++] = attvalue;
9471 attvalue += len;
9472 atts[nbatts++] = attvalue;
9473 /*
9474 * tag if some deallocation is needed
9475 */
9476 if (alloc != 0) attval = 1;
9477 attvalue = NULL; /* moved into atts */
9478 }
9479
9480 next_attr:
9481 if ((attvalue != NULL) && (alloc != 0)) {
9482 xmlFree(attvalue);
9483 attvalue = NULL;
9484 }
9485
9486 GROW
9487 if (ctxt->instate == XML_PARSER_EOF)
9488 break;
9489 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9490 break;
9491 if (SKIP_BLANKS == 0) {
9492 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9493 "attributes construct error\n");
9494 break;
9495 }
9496 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9497 (attname == NULL) && (attvalue == NULL)) {
9498 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9499 "xmlParseStartTag: problem parsing attributes\n");
9500 break;
9501 }
9502 GROW;
9503 }
9504
9505 if (ctxt->input->id != inputid) {
9506 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9507 "Unexpected change of input\n");
9508 localname = NULL;
9509 goto done;
9510 }
9511
9512 /* Reconstruct attribute value pointers. */
9513 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9514 if (atts[i+2] != NULL) {
9515 /*
9516 * Arithmetic on dangling pointers is technically undefined
9517 * behavior, but well...
9518 */
9519 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9520 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9521 atts[i+3] += offset; /* value */
9522 atts[i+4] += offset; /* valuend */
9523 }
9524 }
9525
9526 /*
9527 * The attributes defaulting
9528 */
9529 if (ctxt->attsDefault != NULL) {
9530 xmlDefAttrsPtr defaults;
9531
9532 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9533 if (defaults != NULL) {
9534 for (i = 0;i < defaults->nbAttrs;i++) {
9535 attname = defaults->values[5 * i];
9536 aprefix = defaults->values[5 * i + 1];
9537
9538 /*
9539 * special work for namespaces defaulted defs
9540 */
9541 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9542 /*
9543 * check that it's not a defined namespace
9544 */
9545 for (j = 1;j <= nbNs;j++)
9546 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9547 break;
9548 if (j <= nbNs) continue;
9549
9550 nsname = xmlGetNamespace(ctxt, NULL);
9551 if (nsname != defaults->values[5 * i + 2]) {
9552 if (nsPush(ctxt, NULL,
9553 defaults->values[5 * i + 2]) > 0)
9554 nbNs++;
9555 }
9556 } else if (aprefix == ctxt->str_xmlns) {
9557 /*
9558 * check that it's not a defined namespace
9559 */
9560 for (j = 1;j <= nbNs;j++)
9561 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9562 break;
9563 if (j <= nbNs) continue;
9564
9565 nsname = xmlGetNamespace(ctxt, attname);
9566 if (nsname != defaults->values[2]) {
9567 if (nsPush(ctxt, attname,
9568 defaults->values[5 * i + 2]) > 0)
9569 nbNs++;
9570 }
9571 } else {
9572 /*
9573 * check that it's not a defined attribute
9574 */
9575 for (j = 0;j < nbatts;j+=5) {
9576 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9577 break;
9578 }
9579 if (j < nbatts) continue;
9580
9581 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9582 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9583 localname = NULL;
9584 goto done;
9585 }
9586 maxatts = ctxt->maxatts;
9587 atts = ctxt->atts;
9588 }
9589 atts[nbatts++] = attname;
9590 atts[nbatts++] = aprefix;
9591 if (aprefix == NULL)
9592 atts[nbatts++] = NULL;
9593 else
9594 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9595 atts[nbatts++] = defaults->values[5 * i + 2];
9596 atts[nbatts++] = defaults->values[5 * i + 3];
9597 if ((ctxt->standalone == 1) &&
9598 (defaults->values[5 * i + 4] != NULL)) {
9599 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9600 "standalone: attribute %s on %s defaulted from external subset\n",
9601 attname, localname);
9602 }
9603 nbdef++;
9604 }
9605 }
9606 }
9607 }
9608
9609 /*
9610 * The attributes checkings
9611 */
9612 for (i = 0; i < nbatts;i += 5) {
9613 /*
9614 * The default namespace does not apply to attribute names.
9615 */
9616 if (atts[i + 1] != NULL) {
9617 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9618 if (nsname == NULL) {
9619 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9620 "Namespace prefix %s for %s on %s is not defined\n",
9621 atts[i + 1], atts[i], localname);
9622 }
9623 atts[i + 2] = nsname;
9624 } else
9625 nsname = NULL;
9626 /*
9627 * [ WFC: Unique Att Spec ]
9628 * No attribute name may appear more than once in the same
9629 * start-tag or empty-element tag.
9630 * As extended by the Namespace in XML REC.
9631 */
9632 for (j = 0; j < i;j += 5) {
9633 if (atts[i] == atts[j]) {
9634 if (atts[i+1] == atts[j+1]) {
9635 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9636 break;
9637 }
9638 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9639 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9640 "Namespaced Attribute %s in '%s' redefined\n",
9641 atts[i], nsname, NULL);
9642 break;
9643 }
9644 }
9645 }
9646 }
9647
9648 nsname = xmlGetNamespace(ctxt, prefix);
9649 if ((prefix != NULL) && (nsname == NULL)) {
9650 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9651 "Namespace prefix %s on %s is not defined\n",
9652 prefix, localname, NULL);
9653 }
9654 *pref = prefix;
9655 *URI = nsname;
9656
9657 /*
9658 * SAX: Start of Element !
9659 */
9660 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9661 (!ctxt->disableSAX)) {
9662 if (nbNs > 0)
9663 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9664 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9665 nbatts / 5, nbdef, atts);
9666 else
9667 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9668 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9669 }
9670
9671 done:
9672 /*
9673 * Free up attribute allocated strings if needed
9674 */
9675 if (attval != 0) {
9676 for (i = 3,j = 0; j < nratts;i += 5,j++)
9677 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9678 xmlFree((xmlChar *) atts[i]);
9679 }
9680
9681 return(localname);
9682 }
9683
9684 /**
9685 * xmlParseEndTag2:
9686 * @ctxt: an XML parser context
9687 * @line: line of the start tag
9688 * @nsNr: number of namespaces on the start tag
9689 *
9690 * parse an end of tag
9691 *
9692 * [42] ETag ::= '</' Name S? '>'
9693 *
9694 * With namespace
9695 *
9696 * [NS 9] ETag ::= '</' QName S? '>'
9697 */
9698
9699 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9700 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9701 const xmlChar *name;
9702
9703 GROW;
9704 if ((RAW != '<') || (NXT(1) != '/')) {
9705 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9706 return;
9707 }
9708 SKIP(2);
9709
9710 if (tag->prefix == NULL)
9711 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9712 else
9713 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9714
9715 /*
9716 * We should definitely be at the ending "S? '>'" part
9717 */
9718 GROW;
9719 if (ctxt->instate == XML_PARSER_EOF)
9720 return;
9721 SKIP_BLANKS;
9722 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9723 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9724 } else
9725 NEXT1;
9726
9727 /*
9728 * [ WFC: Element Type Match ]
9729 * The Name in an element's end-tag must match the element type in the
9730 * start-tag.
9731 *
9732 */
9733 if (name != (xmlChar*)1) {
9734 if (name == NULL) name = BAD_CAST "unparsable";
9735 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9736 "Opening and ending tag mismatch: %s line %d and %s\n",
9737 ctxt->name, tag->line, name);
9738 }
9739
9740 /*
9741 * SAX: End of Tag
9742 */
9743 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9744 (!ctxt->disableSAX))
9745 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9746 tag->URI);
9747
9748 spacePop(ctxt);
9749 if (tag->nsNr != 0)
9750 nsPop(ctxt, tag->nsNr);
9751 }
9752
9753 /**
9754 * xmlParseCDSect:
9755 * @ctxt: an XML parser context
9756 *
9757 * Parse escaped pure raw content.
9758 *
9759 * [18] CDSect ::= CDStart CData CDEnd
9760 *
9761 * [19] CDStart ::= '<![CDATA['
9762 *
9763 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9764 *
9765 * [21] CDEnd ::= ']]>'
9766 */
9767 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9768 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9769 xmlChar *buf = NULL;
9770 int len = 0;
9771 int size = XML_PARSER_BUFFER_SIZE;
9772 int r, rl;
9773 int s, sl;
9774 int cur, l;
9775 int count = 0;
9776 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9777 XML_MAX_HUGE_LENGTH :
9778 XML_MAX_TEXT_LENGTH;
9779
9780 /* Check 2.6.0 was NXT(0) not RAW */
9781 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9782 SKIP(9);
9783 } else
9784 return;
9785
9786 ctxt->instate = XML_PARSER_CDATA_SECTION;
9787 r = CUR_CHAR(rl);
9788 if (!IS_CHAR(r)) {
9789 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9790 ctxt->instate = XML_PARSER_CONTENT;
9791 return;
9792 }
9793 NEXTL(rl);
9794 s = CUR_CHAR(sl);
9795 if (!IS_CHAR(s)) {
9796 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9797 ctxt->instate = XML_PARSER_CONTENT;
9798 return;
9799 }
9800 NEXTL(sl);
9801 cur = CUR_CHAR(l);
9802 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9803 if (buf == NULL) {
9804 xmlErrMemory(ctxt, NULL);
9805 return;
9806 }
9807 while (IS_CHAR(cur) &&
9808 ((r != ']') || (s != ']') || (cur != '>'))) {
9809 if (len + 5 >= size) {
9810 xmlChar *tmp;
9811
9812 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9813 if (tmp == NULL) {
9814 xmlFree(buf);
9815 xmlErrMemory(ctxt, NULL);
9816 return;
9817 }
9818 buf = tmp;
9819 size *= 2;
9820 }
9821 COPY_BUF(rl,buf,len,r);
9822 r = s;
9823 rl = sl;
9824 s = cur;
9825 sl = l;
9826 count++;
9827 if (count > 50) {
9828 SHRINK;
9829 GROW;
9830 if (ctxt->instate == XML_PARSER_EOF) {
9831 xmlFree(buf);
9832 return;
9833 }
9834 count = 0;
9835 }
9836 NEXTL(l);
9837 cur = CUR_CHAR(l);
9838 if (len > maxLength) {
9839 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9840 "CData section too big found\n");
9841 xmlFree(buf);
9842 return;
9843 }
9844 }
9845 buf[len] = 0;
9846 ctxt->instate = XML_PARSER_CONTENT;
9847 if (cur != '>') {
9848 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9849 "CData section not finished\n%.50s\n", buf);
9850 xmlFree(buf);
9851 return;
9852 }
9853 NEXTL(l);
9854
9855 /*
9856 * OK the buffer is to be consumed as cdata.
9857 */
9858 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9859 if (ctxt->sax->cdataBlock != NULL)
9860 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9861 else if (ctxt->sax->characters != NULL)
9862 ctxt->sax->characters(ctxt->userData, buf, len);
9863 }
9864 xmlFree(buf);
9865 }
9866
9867 /**
9868 * xmlParseContentInternal:
9869 * @ctxt: an XML parser context
9870 *
9871 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9872 * unexpected EOF to the caller.
9873 */
9874
9875 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9876 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9877 int nameNr = ctxt->nameNr;
9878
9879 GROW;
9880 while ((RAW != 0) &&
9881 (ctxt->instate != XML_PARSER_EOF)) {
9882 const xmlChar *test = CUR_PTR;
9883 unsigned int cons = ctxt->input->consumed;
9884 const xmlChar *cur = ctxt->input->cur;
9885
9886 /*
9887 * First case : a Processing Instruction.
9888 */
9889 if ((*cur == '<') && (cur[1] == '?')) {
9890 xmlParsePI(ctxt);
9891 }
9892
9893 /*
9894 * Second case : a CDSection
9895 */
9896 /* 2.6.0 test was *cur not RAW */
9897 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9898 xmlParseCDSect(ctxt);
9899 }
9900
9901 /*
9902 * Third case : a comment
9903 */
9904 else if ((*cur == '<') && (NXT(1) == '!') &&
9905 (NXT(2) == '-') && (NXT(3) == '-')) {
9906 xmlParseComment(ctxt);
9907 ctxt->instate = XML_PARSER_CONTENT;
9908 }
9909
9910 /*
9911 * Fourth case : a sub-element.
9912 */
9913 else if (*cur == '<') {
9914 if (NXT(1) == '/') {
9915 if (ctxt->nameNr <= nameNr)
9916 break;
9917 xmlParseElementEnd(ctxt);
9918 } else {
9919 xmlParseElementStart(ctxt);
9920 }
9921 }
9922
9923 /*
9924 * Fifth case : a reference. If if has not been resolved,
9925 * parsing returns it's Name, create the node
9926 */
9927
9928 else if (*cur == '&') {
9929 xmlParseReference(ctxt);
9930 }
9931
9932 /*
9933 * Last case, text. Note that References are handled directly.
9934 */
9935 else {
9936 xmlParseCharData(ctxt, 0);
9937 }
9938
9939 GROW;
9940 SHRINK;
9941
9942 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9943 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9944 "detected an error in element content\n");
9945 xmlHaltParser(ctxt);
9946 break;
9947 }
9948 }
9949 }
9950
9951 /**
9952 * xmlParseContent:
9953 * @ctxt: an XML parser context
9954 *
9955 * Parse a content sequence. Stops at EOF or '</'.
9956 *
9957 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9958 */
9959
9960 void
xmlParseContent(xmlParserCtxtPtr ctxt)9961 xmlParseContent(xmlParserCtxtPtr ctxt) {
9962 int nameNr = ctxt->nameNr;
9963
9964 xmlParseContentInternal(ctxt);
9965
9966 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9967 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9968 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9969 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9970 "Premature end of data in tag %s line %d\n",
9971 name, line, NULL);
9972 }
9973 }
9974
9975 /**
9976 * xmlParseElement:
9977 * @ctxt: an XML parser context
9978 *
9979 * parse an XML element
9980 *
9981 * [39] element ::= EmptyElemTag | STag content ETag
9982 *
9983 * [ WFC: Element Type Match ]
9984 * The Name in an element's end-tag must match the element type in the
9985 * start-tag.
9986 *
9987 */
9988
9989 void
xmlParseElement(xmlParserCtxtPtr ctxt)9990 xmlParseElement(xmlParserCtxtPtr ctxt) {
9991 if (xmlParseElementStart(ctxt) != 0)
9992 return;
9993
9994 xmlParseContentInternal(ctxt);
9995 if (ctxt->instate == XML_PARSER_EOF)
9996 return;
9997
9998 if (CUR == 0) {
9999 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10000 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10001 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10002 "Premature end of data in tag %s line %d\n",
10003 name, line, NULL);
10004 return;
10005 }
10006
10007 xmlParseElementEnd(ctxt);
10008 }
10009
10010 /**
10011 * xmlParseElementStart:
10012 * @ctxt: an XML parser context
10013 *
10014 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10015 * opening tag was parsed, 1 if an empty element was parsed.
10016 */
10017 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10018 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10019 const xmlChar *name;
10020 const xmlChar *prefix = NULL;
10021 const xmlChar *URI = NULL;
10022 xmlParserNodeInfo node_info;
10023 int line, tlen = 0;
10024 xmlNodePtr ret;
10025 int nsNr = ctxt->nsNr;
10026
10027 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10028 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10029 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10030 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10031 xmlParserMaxDepth);
10032 xmlHaltParser(ctxt);
10033 return(-1);
10034 }
10035
10036 /* Capture start position */
10037 if (ctxt->record_info) {
10038 node_info.begin_pos = ctxt->input->consumed +
10039 (CUR_PTR - ctxt->input->base);
10040 node_info.begin_line = ctxt->input->line;
10041 }
10042
10043 if (ctxt->spaceNr == 0)
10044 spacePush(ctxt, -1);
10045 else if (*ctxt->space == -2)
10046 spacePush(ctxt, -1);
10047 else
10048 spacePush(ctxt, *ctxt->space);
10049
10050 line = ctxt->input->line;
10051 #ifdef LIBXML_SAX1_ENABLED
10052 if (ctxt->sax2)
10053 #endif /* LIBXML_SAX1_ENABLED */
10054 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10055 #ifdef LIBXML_SAX1_ENABLED
10056 else
10057 name = xmlParseStartTag(ctxt);
10058 #endif /* LIBXML_SAX1_ENABLED */
10059 if (ctxt->instate == XML_PARSER_EOF)
10060 return(-1);
10061 if (name == NULL) {
10062 spacePop(ctxt);
10063 return(-1);
10064 }
10065 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10066 ret = ctxt->node;
10067
10068 #ifdef LIBXML_VALID_ENABLED
10069 /*
10070 * [ VC: Root Element Type ]
10071 * The Name in the document type declaration must match the element
10072 * type of the root element.
10073 */
10074 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10075 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10076 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10077 #endif /* LIBXML_VALID_ENABLED */
10078
10079 /*
10080 * Check for an Empty Element.
10081 */
10082 if ((RAW == '/') && (NXT(1) == '>')) {
10083 SKIP(2);
10084 if (ctxt->sax2) {
10085 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10086 (!ctxt->disableSAX))
10087 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10088 #ifdef LIBXML_SAX1_ENABLED
10089 } else {
10090 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10091 (!ctxt->disableSAX))
10092 ctxt->sax->endElement(ctxt->userData, name);
10093 #endif /* LIBXML_SAX1_ENABLED */
10094 }
10095 namePop(ctxt);
10096 spacePop(ctxt);
10097 if (nsNr != ctxt->nsNr)
10098 nsPop(ctxt, ctxt->nsNr - nsNr);
10099 if ( ret != NULL && ctxt->record_info ) {
10100 node_info.end_pos = ctxt->input->consumed +
10101 (CUR_PTR - ctxt->input->base);
10102 node_info.end_line = ctxt->input->line;
10103 node_info.node = ret;
10104 xmlParserAddNodeInfo(ctxt, &node_info);
10105 }
10106 return(1);
10107 }
10108 if (RAW == '>') {
10109 NEXT1;
10110 } else {
10111 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10112 "Couldn't find end of Start Tag %s line %d\n",
10113 name, line, NULL);
10114
10115 /*
10116 * end of parsing of this node.
10117 */
10118 nodePop(ctxt);
10119 namePop(ctxt);
10120 spacePop(ctxt);
10121 if (nsNr != ctxt->nsNr)
10122 nsPop(ctxt, ctxt->nsNr - nsNr);
10123
10124 /*
10125 * Capture end position and add node
10126 */
10127 if ( ret != NULL && ctxt->record_info ) {
10128 node_info.end_pos = ctxt->input->consumed +
10129 (CUR_PTR - ctxt->input->base);
10130 node_info.end_line = ctxt->input->line;
10131 node_info.node = ret;
10132 xmlParserAddNodeInfo(ctxt, &node_info);
10133 }
10134 return(-1);
10135 }
10136
10137 return(0);
10138 }
10139
10140 /**
10141 * xmlParseElementEnd:
10142 * @ctxt: an XML parser context
10143 *
10144 * Parse the end of an XML element.
10145 */
10146 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10147 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10148 xmlParserNodeInfo node_info;
10149 xmlNodePtr ret = ctxt->node;
10150
10151 if (ctxt->nameNr <= 0)
10152 return;
10153
10154 /*
10155 * parse the end of tag: '</' should be here.
10156 */
10157 if (ctxt->sax2) {
10158 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10159 namePop(ctxt);
10160 }
10161 #ifdef LIBXML_SAX1_ENABLED
10162 else
10163 xmlParseEndTag1(ctxt, 0);
10164 #endif /* LIBXML_SAX1_ENABLED */
10165
10166 /*
10167 * Capture end position and add node
10168 */
10169 if ( ret != NULL && ctxt->record_info ) {
10170 node_info.end_pos = ctxt->input->consumed +
10171 (CUR_PTR - ctxt->input->base);
10172 node_info.end_line = ctxt->input->line;
10173 node_info.node = ret;
10174 xmlParserAddNodeInfo(ctxt, &node_info);
10175 }
10176 }
10177
10178 /**
10179 * xmlParseVersionNum:
10180 * @ctxt: an XML parser context
10181 *
10182 * parse the XML version value.
10183 *
10184 * [26] VersionNum ::= '1.' [0-9]+
10185 *
10186 * In practice allow [0-9].[0-9]+ at that level
10187 *
10188 * Returns the string giving the XML version number, or NULL
10189 */
10190 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10191 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10192 xmlChar *buf = NULL;
10193 int len = 0;
10194 int size = 10;
10195 xmlChar cur;
10196
10197 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10198 if (buf == NULL) {
10199 xmlErrMemory(ctxt, NULL);
10200 return(NULL);
10201 }
10202 cur = CUR;
10203 if (!((cur >= '0') && (cur <= '9'))) {
10204 xmlFree(buf);
10205 return(NULL);
10206 }
10207 buf[len++] = cur;
10208 NEXT;
10209 cur=CUR;
10210 if (cur != '.') {
10211 xmlFree(buf);
10212 return(NULL);
10213 }
10214 buf[len++] = cur;
10215 NEXT;
10216 cur=CUR;
10217 while ((cur >= '0') && (cur <= '9')) {
10218 if (len + 1 >= size) {
10219 xmlChar *tmp;
10220
10221 size *= 2;
10222 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10223 if (tmp == NULL) {
10224 xmlFree(buf);
10225 xmlErrMemory(ctxt, NULL);
10226 return(NULL);
10227 }
10228 buf = tmp;
10229 }
10230 buf[len++] = cur;
10231 NEXT;
10232 cur=CUR;
10233 }
10234 buf[len] = 0;
10235 return(buf);
10236 }
10237
10238 /**
10239 * xmlParseVersionInfo:
10240 * @ctxt: an XML parser context
10241 *
10242 * parse the XML version.
10243 *
10244 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10245 *
10246 * [25] Eq ::= S? '=' S?
10247 *
10248 * Returns the version string, e.g. "1.0"
10249 */
10250
10251 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10252 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10253 xmlChar *version = NULL;
10254
10255 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10256 SKIP(7);
10257 SKIP_BLANKS;
10258 if (RAW != '=') {
10259 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10260 return(NULL);
10261 }
10262 NEXT;
10263 SKIP_BLANKS;
10264 if (RAW == '"') {
10265 NEXT;
10266 version = xmlParseVersionNum(ctxt);
10267 if (RAW != '"') {
10268 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10269 } else
10270 NEXT;
10271 } else if (RAW == '\''){
10272 NEXT;
10273 version = xmlParseVersionNum(ctxt);
10274 if (RAW != '\'') {
10275 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10276 } else
10277 NEXT;
10278 } else {
10279 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10280 }
10281 }
10282 return(version);
10283 }
10284
10285 /**
10286 * xmlParseEncName:
10287 * @ctxt: an XML parser context
10288 *
10289 * parse the XML encoding name
10290 *
10291 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10292 *
10293 * Returns the encoding name value or NULL
10294 */
10295 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10296 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10297 xmlChar *buf = NULL;
10298 int len = 0;
10299 int size = 10;
10300 xmlChar cur;
10301
10302 cur = CUR;
10303 if (((cur >= 'a') && (cur <= 'z')) ||
10304 ((cur >= 'A') && (cur <= 'Z'))) {
10305 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10306 if (buf == NULL) {
10307 xmlErrMemory(ctxt, NULL);
10308 return(NULL);
10309 }
10310
10311 buf[len++] = cur;
10312 NEXT;
10313 cur = CUR;
10314 while (((cur >= 'a') && (cur <= 'z')) ||
10315 ((cur >= 'A') && (cur <= 'Z')) ||
10316 ((cur >= '0') && (cur <= '9')) ||
10317 (cur == '.') || (cur == '_') ||
10318 (cur == '-')) {
10319 if (len + 1 >= size) {
10320 xmlChar *tmp;
10321
10322 size *= 2;
10323 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10324 if (tmp == NULL) {
10325 xmlErrMemory(ctxt, NULL);
10326 xmlFree(buf);
10327 return(NULL);
10328 }
10329 buf = tmp;
10330 }
10331 buf[len++] = cur;
10332 NEXT;
10333 cur = CUR;
10334 if (cur == 0) {
10335 SHRINK;
10336 GROW;
10337 cur = CUR;
10338 }
10339 }
10340 buf[len] = 0;
10341 } else {
10342 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10343 }
10344 return(buf);
10345 }
10346
10347 /**
10348 * xmlParseEncodingDecl:
10349 * @ctxt: an XML parser context
10350 *
10351 * parse the XML encoding declaration
10352 *
10353 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10354 *
10355 * this setups the conversion filters.
10356 *
10357 * Returns the encoding value or NULL
10358 */
10359
10360 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10361 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10362 xmlChar *encoding = NULL;
10363
10364 SKIP_BLANKS;
10365 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10366 SKIP(8);
10367 SKIP_BLANKS;
10368 if (RAW != '=') {
10369 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10370 return(NULL);
10371 }
10372 NEXT;
10373 SKIP_BLANKS;
10374 if (RAW == '"') {
10375 NEXT;
10376 encoding = xmlParseEncName(ctxt);
10377 if (RAW != '"') {
10378 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10379 xmlFree((xmlChar *) encoding);
10380 return(NULL);
10381 } else
10382 NEXT;
10383 } else if (RAW == '\''){
10384 NEXT;
10385 encoding = xmlParseEncName(ctxt);
10386 if (RAW != '\'') {
10387 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10388 xmlFree((xmlChar *) encoding);
10389 return(NULL);
10390 } else
10391 NEXT;
10392 } else {
10393 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10394 }
10395
10396 /*
10397 * Non standard parsing, allowing the user to ignore encoding
10398 */
10399 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10400 xmlFree((xmlChar *) encoding);
10401 return(NULL);
10402 }
10403
10404 /*
10405 * UTF-16 encoding switch has already taken place at this stage,
10406 * more over the little-endian/big-endian selection is already done
10407 */
10408 if ((encoding != NULL) &&
10409 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10410 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10411 /*
10412 * If no encoding was passed to the parser, that we are
10413 * using UTF-16 and no decoder is present i.e. the
10414 * document is apparently UTF-8 compatible, then raise an
10415 * encoding mismatch fatal error
10416 */
10417 if ((ctxt->encoding == NULL) &&
10418 (ctxt->input->buf != NULL) &&
10419 (ctxt->input->buf->encoder == NULL)) {
10420 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10421 "Document labelled UTF-16 but has UTF-8 content\n");
10422 }
10423 if (ctxt->encoding != NULL)
10424 xmlFree((xmlChar *) ctxt->encoding);
10425 ctxt->encoding = encoding;
10426 }
10427 /*
10428 * UTF-8 encoding is handled natively
10429 */
10430 else if ((encoding != NULL) &&
10431 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10432 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10433 if (ctxt->encoding != NULL)
10434 xmlFree((xmlChar *) ctxt->encoding);
10435 ctxt->encoding = encoding;
10436 }
10437 else if (encoding != NULL) {
10438 xmlCharEncodingHandlerPtr handler;
10439
10440 if (ctxt->input->encoding != NULL)
10441 xmlFree((xmlChar *) ctxt->input->encoding);
10442 ctxt->input->encoding = encoding;
10443
10444 handler = xmlFindCharEncodingHandler((const char *) encoding);
10445 if (handler != NULL) {
10446 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10447 /* failed to convert */
10448 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10449 return(NULL);
10450 }
10451 } else {
10452 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10453 "Unsupported encoding %s\n", encoding);
10454 return(NULL);
10455 }
10456 }
10457 }
10458 return(encoding);
10459 }
10460
10461 /**
10462 * xmlParseSDDecl:
10463 * @ctxt: an XML parser context
10464 *
10465 * parse the XML standalone declaration
10466 *
10467 * [32] SDDecl ::= S 'standalone' Eq
10468 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10469 *
10470 * [ VC: Standalone Document Declaration ]
10471 * TODO The standalone document declaration must have the value "no"
10472 * if any external markup declarations contain declarations of:
10473 * - attributes with default values, if elements to which these
10474 * attributes apply appear in the document without specifications
10475 * of values for these attributes, or
10476 * - entities (other than amp, lt, gt, apos, quot), if references
10477 * to those entities appear in the document, or
10478 * - attributes with values subject to normalization, where the
10479 * attribute appears in the document with a value which will change
10480 * as a result of normalization, or
10481 * - element types with element content, if white space occurs directly
10482 * within any instance of those types.
10483 *
10484 * Returns:
10485 * 1 if standalone="yes"
10486 * 0 if standalone="no"
10487 * -2 if standalone attribute is missing or invalid
10488 * (A standalone value of -2 means that the XML declaration was found,
10489 * but no value was specified for the standalone attribute).
10490 */
10491
10492 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10493 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10494 int standalone = -2;
10495
10496 SKIP_BLANKS;
10497 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10498 SKIP(10);
10499 SKIP_BLANKS;
10500 if (RAW != '=') {
10501 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10502 return(standalone);
10503 }
10504 NEXT;
10505 SKIP_BLANKS;
10506 if (RAW == '\''){
10507 NEXT;
10508 if ((RAW == 'n') && (NXT(1) == 'o')) {
10509 standalone = 0;
10510 SKIP(2);
10511 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10512 (NXT(2) == 's')) {
10513 standalone = 1;
10514 SKIP(3);
10515 } else {
10516 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10517 }
10518 if (RAW != '\'') {
10519 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10520 } else
10521 NEXT;
10522 } else if (RAW == '"'){
10523 NEXT;
10524 if ((RAW == 'n') && (NXT(1) == 'o')) {
10525 standalone = 0;
10526 SKIP(2);
10527 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10528 (NXT(2) == 's')) {
10529 standalone = 1;
10530 SKIP(3);
10531 } else {
10532 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10533 }
10534 if (RAW != '"') {
10535 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10536 } else
10537 NEXT;
10538 } else {
10539 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10540 }
10541 }
10542 return(standalone);
10543 }
10544
10545 /**
10546 * xmlParseXMLDecl:
10547 * @ctxt: an XML parser context
10548 *
10549 * parse an XML declaration header
10550 *
10551 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10552 */
10553
10554 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10555 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10556 xmlChar *version;
10557
10558 /*
10559 * This value for standalone indicates that the document has an
10560 * XML declaration but it does not have a standalone attribute.
10561 * It will be overwritten later if a standalone attribute is found.
10562 */
10563 ctxt->input->standalone = -2;
10564
10565 /*
10566 * We know that '<?xml' is here.
10567 */
10568 SKIP(5);
10569
10570 if (!IS_BLANK_CH(RAW)) {
10571 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10572 "Blank needed after '<?xml'\n");
10573 }
10574 SKIP_BLANKS;
10575
10576 /*
10577 * We must have the VersionInfo here.
10578 */
10579 version = xmlParseVersionInfo(ctxt);
10580 if (version == NULL) {
10581 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10582 } else {
10583 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10584 /*
10585 * Changed here for XML-1.0 5th edition
10586 */
10587 if (ctxt->options & XML_PARSE_OLD10) {
10588 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10589 "Unsupported version '%s'\n",
10590 version);
10591 } else {
10592 if ((version[0] == '1') && ((version[1] == '.'))) {
10593 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10594 "Unsupported version '%s'\n",
10595 version, NULL);
10596 } else {
10597 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10598 "Unsupported version '%s'\n",
10599 version);
10600 }
10601 }
10602 }
10603 if (ctxt->version != NULL)
10604 xmlFree((void *) ctxt->version);
10605 ctxt->version = version;
10606 }
10607
10608 /*
10609 * We may have the encoding declaration
10610 */
10611 if (!IS_BLANK_CH(RAW)) {
10612 if ((RAW == '?') && (NXT(1) == '>')) {
10613 SKIP(2);
10614 return;
10615 }
10616 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10617 }
10618 xmlParseEncodingDecl(ctxt);
10619 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10620 (ctxt->instate == XML_PARSER_EOF)) {
10621 /*
10622 * The XML REC instructs us to stop parsing right here
10623 */
10624 return;
10625 }
10626
10627 /*
10628 * We may have the standalone status.
10629 */
10630 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10631 if ((RAW == '?') && (NXT(1) == '>')) {
10632 SKIP(2);
10633 return;
10634 }
10635 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10636 }
10637
10638 /*
10639 * We can grow the input buffer freely at that point
10640 */
10641 GROW;
10642
10643 SKIP_BLANKS;
10644 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10645
10646 SKIP_BLANKS;
10647 if ((RAW == '?') && (NXT(1) == '>')) {
10648 SKIP(2);
10649 } else if (RAW == '>') {
10650 /* Deprecated old WD ... */
10651 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10652 NEXT;
10653 } else {
10654 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10655 MOVETO_ENDTAG(CUR_PTR);
10656 NEXT;
10657 }
10658 }
10659
10660 /**
10661 * xmlParseMisc:
10662 * @ctxt: an XML parser context
10663 *
10664 * parse an XML Misc* optional field.
10665 *
10666 * [27] Misc ::= Comment | PI | S
10667 */
10668
10669 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10670 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10671 while (ctxt->instate != XML_PARSER_EOF) {
10672 SKIP_BLANKS;
10673 GROW;
10674 if ((RAW == '<') && (NXT(1) == '?')) {
10675 xmlParsePI(ctxt);
10676 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10677 xmlParseComment(ctxt);
10678 } else {
10679 break;
10680 }
10681 }
10682 }
10683
10684 /**
10685 * xmlParseDocument:
10686 * @ctxt: an XML parser context
10687 *
10688 * parse an XML document (and build a tree if using the standard SAX
10689 * interface).
10690 *
10691 * [1] document ::= prolog element Misc*
10692 *
10693 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10694 *
10695 * Returns 0, -1 in case of error. the parser context is augmented
10696 * as a result of the parsing.
10697 */
10698
10699 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10700 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10701 xmlChar start[4];
10702 xmlCharEncoding enc;
10703
10704 xmlInitParser();
10705
10706 if ((ctxt == NULL) || (ctxt->input == NULL))
10707 return(-1);
10708
10709 GROW;
10710
10711 /*
10712 * SAX: detecting the level.
10713 */
10714 xmlDetectSAX2(ctxt);
10715
10716 /*
10717 * SAX: beginning of the document processing.
10718 */
10719 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10720 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10721 if (ctxt->instate == XML_PARSER_EOF)
10722 return(-1);
10723
10724 if ((ctxt->encoding == NULL) &&
10725 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10726 /*
10727 * Get the 4 first bytes and decode the charset
10728 * if enc != XML_CHAR_ENCODING_NONE
10729 * plug some encoding conversion routines.
10730 */
10731 start[0] = RAW;
10732 start[1] = NXT(1);
10733 start[2] = NXT(2);
10734 start[3] = NXT(3);
10735 enc = xmlDetectCharEncoding(&start[0], 4);
10736 if (enc != XML_CHAR_ENCODING_NONE) {
10737 xmlSwitchEncoding(ctxt, enc);
10738 }
10739 }
10740
10741
10742 if (CUR == 0) {
10743 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10744 return(-1);
10745 }
10746
10747 /*
10748 * Check for the XMLDecl in the Prolog.
10749 * do not GROW here to avoid the detected encoder to decode more
10750 * than just the first line, unless the amount of data is really
10751 * too small to hold "<?xml version="1.0" encoding="foo"
10752 */
10753 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10754 GROW;
10755 }
10756 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10757
10758 /*
10759 * Note that we will switch encoding on the fly.
10760 */
10761 xmlParseXMLDecl(ctxt);
10762 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10763 (ctxt->instate == XML_PARSER_EOF)) {
10764 /*
10765 * The XML REC instructs us to stop parsing right here
10766 */
10767 return(-1);
10768 }
10769 ctxt->standalone = ctxt->input->standalone;
10770 SKIP_BLANKS;
10771 } else {
10772 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10773 }
10774 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10775 ctxt->sax->startDocument(ctxt->userData);
10776 if (ctxt->instate == XML_PARSER_EOF)
10777 return(-1);
10778 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10779 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10780 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10781 }
10782
10783 /*
10784 * The Misc part of the Prolog
10785 */
10786 xmlParseMisc(ctxt);
10787
10788 /*
10789 * Then possibly doc type declaration(s) and more Misc
10790 * (doctypedecl Misc*)?
10791 */
10792 GROW;
10793 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10794
10795 ctxt->inSubset = 1;
10796 xmlParseDocTypeDecl(ctxt);
10797 if (RAW == '[') {
10798 ctxt->instate = XML_PARSER_DTD;
10799 xmlParseInternalSubset(ctxt);
10800 if (ctxt->instate == XML_PARSER_EOF)
10801 return(-1);
10802 }
10803
10804 /*
10805 * Create and update the external subset.
10806 */
10807 ctxt->inSubset = 2;
10808 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10809 (!ctxt->disableSAX))
10810 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10811 ctxt->extSubSystem, ctxt->extSubURI);
10812 if (ctxt->instate == XML_PARSER_EOF)
10813 return(-1);
10814 ctxt->inSubset = 0;
10815
10816 xmlCleanSpecialAttr(ctxt);
10817
10818 ctxt->instate = XML_PARSER_PROLOG;
10819 xmlParseMisc(ctxt);
10820 }
10821
10822 /*
10823 * Time to start parsing the tree itself
10824 */
10825 GROW;
10826 if (RAW != '<') {
10827 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10828 "Start tag expected, '<' not found\n");
10829 } else {
10830 ctxt->instate = XML_PARSER_CONTENT;
10831 xmlParseElement(ctxt);
10832 ctxt->instate = XML_PARSER_EPILOG;
10833
10834
10835 /*
10836 * The Misc part at the end
10837 */
10838 xmlParseMisc(ctxt);
10839
10840 if (RAW != 0) {
10841 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10842 }
10843 ctxt->instate = XML_PARSER_EOF;
10844 }
10845
10846 /*
10847 * SAX: end of the document processing.
10848 */
10849 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10850 ctxt->sax->endDocument(ctxt->userData);
10851
10852 /*
10853 * Remove locally kept entity definitions if the tree was not built
10854 */
10855 if ((ctxt->myDoc != NULL) &&
10856 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10857 xmlFreeDoc(ctxt->myDoc);
10858 ctxt->myDoc = NULL;
10859 }
10860
10861 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10862 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10863 if (ctxt->valid)
10864 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10865 if (ctxt->nsWellFormed)
10866 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10867 if (ctxt->options & XML_PARSE_OLD10)
10868 ctxt->myDoc->properties |= XML_DOC_OLD10;
10869 }
10870 if (! ctxt->wellFormed) {
10871 ctxt->valid = 0;
10872 return(-1);
10873 }
10874 return(0);
10875 }
10876
10877 /**
10878 * xmlParseExtParsedEnt:
10879 * @ctxt: an XML parser context
10880 *
10881 * parse a general parsed entity
10882 * An external general parsed entity is well-formed if it matches the
10883 * production labeled extParsedEnt.
10884 *
10885 * [78] extParsedEnt ::= TextDecl? content
10886 *
10887 * Returns 0, -1 in case of error. the parser context is augmented
10888 * as a result of the parsing.
10889 */
10890
10891 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10892 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10893 xmlChar start[4];
10894 xmlCharEncoding enc;
10895
10896 if ((ctxt == NULL) || (ctxt->input == NULL))
10897 return(-1);
10898
10899 xmlDefaultSAXHandlerInit();
10900
10901 xmlDetectSAX2(ctxt);
10902
10903 GROW;
10904
10905 /*
10906 * SAX: beginning of the document processing.
10907 */
10908 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10909 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10910
10911 /*
10912 * Get the 4 first bytes and decode the charset
10913 * if enc != XML_CHAR_ENCODING_NONE
10914 * plug some encoding conversion routines.
10915 */
10916 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10917 start[0] = RAW;
10918 start[1] = NXT(1);
10919 start[2] = NXT(2);
10920 start[3] = NXT(3);
10921 enc = xmlDetectCharEncoding(start, 4);
10922 if (enc != XML_CHAR_ENCODING_NONE) {
10923 xmlSwitchEncoding(ctxt, enc);
10924 }
10925 }
10926
10927
10928 if (CUR == 0) {
10929 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10930 }
10931
10932 /*
10933 * Check for the XMLDecl in the Prolog.
10934 */
10935 GROW;
10936 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10937
10938 /*
10939 * Note that we will switch encoding on the fly.
10940 */
10941 xmlParseXMLDecl(ctxt);
10942 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10943 /*
10944 * The XML REC instructs us to stop parsing right here
10945 */
10946 return(-1);
10947 }
10948 SKIP_BLANKS;
10949 } else {
10950 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10951 }
10952 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10953 ctxt->sax->startDocument(ctxt->userData);
10954 if (ctxt->instate == XML_PARSER_EOF)
10955 return(-1);
10956
10957 /*
10958 * Doing validity checking on chunk doesn't make sense
10959 */
10960 ctxt->instate = XML_PARSER_CONTENT;
10961 ctxt->validate = 0;
10962 ctxt->loadsubset = 0;
10963 ctxt->depth = 0;
10964
10965 xmlParseContent(ctxt);
10966 if (ctxt->instate == XML_PARSER_EOF)
10967 return(-1);
10968
10969 if ((RAW == '<') && (NXT(1) == '/')) {
10970 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10971 } else if (RAW != 0) {
10972 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10973 }
10974
10975 /*
10976 * SAX: end of the document processing.
10977 */
10978 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10979 ctxt->sax->endDocument(ctxt->userData);
10980
10981 if (! ctxt->wellFormed) return(-1);
10982 return(0);
10983 }
10984
10985 #ifdef LIBXML_PUSH_ENABLED
10986 /************************************************************************
10987 * *
10988 * Progressive parsing interfaces *
10989 * *
10990 ************************************************************************/
10991
10992 /**
10993 * xmlParseLookupSequence:
10994 * @ctxt: an XML parser context
10995 * @first: the first char to lookup
10996 * @next: the next char to lookup or zero
10997 * @third: the next char to lookup or zero
10998 *
10999 * Try to find if a sequence (first, next, third) or just (first next) or
11000 * (first) is available in the input stream.
11001 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11002 * to avoid rescanning sequences of bytes, it DOES change the state of the
11003 * parser, do not use liberally.
11004 *
11005 * Returns the index to the current parsing point if the full sequence
11006 * is available, -1 otherwise.
11007 */
11008 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)11009 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11010 xmlChar next, xmlChar third) {
11011 int base, len;
11012 xmlParserInputPtr in;
11013 const xmlChar *buf;
11014
11015 in = ctxt->input;
11016 if (in == NULL) return(-1);
11017 base = in->cur - in->base;
11018 if (base < 0) return(-1);
11019 if (ctxt->checkIndex > base)
11020 base = ctxt->checkIndex;
11021 if (in->buf == NULL) {
11022 buf = in->base;
11023 len = in->length;
11024 } else {
11025 buf = xmlBufContent(in->buf->buffer);
11026 len = xmlBufUse(in->buf->buffer);
11027 }
11028 /* take into account the sequence length */
11029 if (third) len -= 2;
11030 else if (next) len --;
11031 for (;base < len;base++) {
11032 if (buf[base] == first) {
11033 if (third != 0) {
11034 if ((buf[base + 1] != next) ||
11035 (buf[base + 2] != third)) continue;
11036 } else if (next != 0) {
11037 if (buf[base + 1] != next) continue;
11038 }
11039 ctxt->checkIndex = 0;
11040 #ifdef DEBUG_PUSH
11041 if (next == 0)
11042 xmlGenericError(xmlGenericErrorContext,
11043 "PP: lookup '%c' found at %d\n",
11044 first, base);
11045 else if (third == 0)
11046 xmlGenericError(xmlGenericErrorContext,
11047 "PP: lookup '%c%c' found at %d\n",
11048 first, next, base);
11049 else
11050 xmlGenericError(xmlGenericErrorContext,
11051 "PP: lookup '%c%c%c' found at %d\n",
11052 first, next, third, base);
11053 #endif
11054 return(base - (in->cur - in->base));
11055 }
11056 }
11057 ctxt->checkIndex = base;
11058 #ifdef DEBUG_PUSH
11059 if (next == 0)
11060 xmlGenericError(xmlGenericErrorContext,
11061 "PP: lookup '%c' failed\n", first);
11062 else if (third == 0)
11063 xmlGenericError(xmlGenericErrorContext,
11064 "PP: lookup '%c%c' failed\n", first, next);
11065 else
11066 xmlGenericError(xmlGenericErrorContext,
11067 "PP: lookup '%c%c%c' failed\n", first, next, third);
11068 #endif
11069 return(-1);
11070 }
11071
11072 /**
11073 * xmlParseGetLasts:
11074 * @ctxt: an XML parser context
11075 * @lastlt: pointer to store the last '<' from the input
11076 * @lastgt: pointer to store the last '>' from the input
11077 *
11078 * Lookup the last < and > in the current chunk
11079 */
11080 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)11081 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11082 const xmlChar **lastgt) {
11083 const xmlChar *tmp;
11084
11085 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11086 xmlGenericError(xmlGenericErrorContext,
11087 "Internal error: xmlParseGetLasts\n");
11088 return;
11089 }
11090 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11091 tmp = ctxt->input->end;
11092 tmp--;
11093 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11094 if (tmp < ctxt->input->base) {
11095 *lastlt = NULL;
11096 *lastgt = NULL;
11097 } else {
11098 *lastlt = tmp;
11099 tmp++;
11100 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11101 if (*tmp == '\'') {
11102 tmp++;
11103 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11104 if (tmp < ctxt->input->end) tmp++;
11105 } else if (*tmp == '"') {
11106 tmp++;
11107 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11108 if (tmp < ctxt->input->end) tmp++;
11109 } else
11110 tmp++;
11111 }
11112 if (tmp < ctxt->input->end)
11113 *lastgt = tmp;
11114 else {
11115 tmp = *lastlt;
11116 tmp--;
11117 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11118 if (tmp >= ctxt->input->base)
11119 *lastgt = tmp;
11120 else
11121 *lastgt = NULL;
11122 }
11123 }
11124 } else {
11125 *lastlt = NULL;
11126 *lastgt = NULL;
11127 }
11128 }
11129 /**
11130 * xmlCheckCdataPush:
11131 * @cur: pointer to the block of characters
11132 * @len: length of the block in bytes
11133 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11134 *
11135 * Check that the block of characters is okay as SCdata content [20]
11136 *
11137 * Returns the number of bytes to pass if okay, a negative index where an
11138 * UTF-8 error occurred otherwise
11139 */
11140 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11141 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11142 int ix;
11143 unsigned char c;
11144 int codepoint;
11145
11146 if ((utf == NULL) || (len <= 0))
11147 return(0);
11148
11149 for (ix = 0; ix < len;) { /* string is 0-terminated */
11150 c = utf[ix];
11151 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11152 if (c >= 0x20)
11153 ix++;
11154 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11155 ix++;
11156 else
11157 return(-ix);
11158 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11159 if (ix + 2 > len) return(complete ? -ix : ix);
11160 if ((utf[ix+1] & 0xc0 ) != 0x80)
11161 return(-ix);
11162 codepoint = (utf[ix] & 0x1f) << 6;
11163 codepoint |= utf[ix+1] & 0x3f;
11164 if (!xmlIsCharQ(codepoint))
11165 return(-ix);
11166 ix += 2;
11167 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11168 if (ix + 3 > len) return(complete ? -ix : ix);
11169 if (((utf[ix+1] & 0xc0) != 0x80) ||
11170 ((utf[ix+2] & 0xc0) != 0x80))
11171 return(-ix);
11172 codepoint = (utf[ix] & 0xf) << 12;
11173 codepoint |= (utf[ix+1] & 0x3f) << 6;
11174 codepoint |= utf[ix+2] & 0x3f;
11175 if (!xmlIsCharQ(codepoint))
11176 return(-ix);
11177 ix += 3;
11178 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11179 if (ix + 4 > len) return(complete ? -ix : ix);
11180 if (((utf[ix+1] & 0xc0) != 0x80) ||
11181 ((utf[ix+2] & 0xc0) != 0x80) ||
11182 ((utf[ix+3] & 0xc0) != 0x80))
11183 return(-ix);
11184 codepoint = (utf[ix] & 0x7) << 18;
11185 codepoint |= (utf[ix+1] & 0x3f) << 12;
11186 codepoint |= (utf[ix+2] & 0x3f) << 6;
11187 codepoint |= utf[ix+3] & 0x3f;
11188 if (!xmlIsCharQ(codepoint))
11189 return(-ix);
11190 ix += 4;
11191 } else /* unknown encoding */
11192 return(-ix);
11193 }
11194 return(ix);
11195 }
11196
11197 /**
11198 * xmlParseTryOrFinish:
11199 * @ctxt: an XML parser context
11200 * @terminate: last chunk indicator
11201 *
11202 * Try to progress on parsing
11203 *
11204 * Returns zero if no parsing was possible
11205 */
11206 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11207 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11208 int ret = 0;
11209 int avail, tlen;
11210 xmlChar cur, next;
11211 const xmlChar *lastlt, *lastgt;
11212
11213 if (ctxt->input == NULL)
11214 return(0);
11215
11216 #ifdef DEBUG_PUSH
11217 switch (ctxt->instate) {
11218 case XML_PARSER_EOF:
11219 xmlGenericError(xmlGenericErrorContext,
11220 "PP: try EOF\n"); break;
11221 case XML_PARSER_START:
11222 xmlGenericError(xmlGenericErrorContext,
11223 "PP: try START\n"); break;
11224 case XML_PARSER_MISC:
11225 xmlGenericError(xmlGenericErrorContext,
11226 "PP: try MISC\n");break;
11227 case XML_PARSER_COMMENT:
11228 xmlGenericError(xmlGenericErrorContext,
11229 "PP: try COMMENT\n");break;
11230 case XML_PARSER_PROLOG:
11231 xmlGenericError(xmlGenericErrorContext,
11232 "PP: try PROLOG\n");break;
11233 case XML_PARSER_START_TAG:
11234 xmlGenericError(xmlGenericErrorContext,
11235 "PP: try START_TAG\n");break;
11236 case XML_PARSER_CONTENT:
11237 xmlGenericError(xmlGenericErrorContext,
11238 "PP: try CONTENT\n");break;
11239 case XML_PARSER_CDATA_SECTION:
11240 xmlGenericError(xmlGenericErrorContext,
11241 "PP: try CDATA_SECTION\n");break;
11242 case XML_PARSER_END_TAG:
11243 xmlGenericError(xmlGenericErrorContext,
11244 "PP: try END_TAG\n");break;
11245 case XML_PARSER_ENTITY_DECL:
11246 xmlGenericError(xmlGenericErrorContext,
11247 "PP: try ENTITY_DECL\n");break;
11248 case XML_PARSER_ENTITY_VALUE:
11249 xmlGenericError(xmlGenericErrorContext,
11250 "PP: try ENTITY_VALUE\n");break;
11251 case XML_PARSER_ATTRIBUTE_VALUE:
11252 xmlGenericError(xmlGenericErrorContext,
11253 "PP: try ATTRIBUTE_VALUE\n");break;
11254 case XML_PARSER_DTD:
11255 xmlGenericError(xmlGenericErrorContext,
11256 "PP: try DTD\n");break;
11257 case XML_PARSER_EPILOG:
11258 xmlGenericError(xmlGenericErrorContext,
11259 "PP: try EPILOG\n");break;
11260 case XML_PARSER_PI:
11261 xmlGenericError(xmlGenericErrorContext,
11262 "PP: try PI\n");break;
11263 case XML_PARSER_IGNORE:
11264 xmlGenericError(xmlGenericErrorContext,
11265 "PP: try IGNORE\n");break;
11266 }
11267 #endif
11268
11269 if ((ctxt->input != NULL) &&
11270 (ctxt->input->cur - ctxt->input->base > 4096)) {
11271 xmlSHRINK(ctxt);
11272 ctxt->checkIndex = 0;
11273 }
11274 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11275
11276 while (ctxt->instate != XML_PARSER_EOF) {
11277 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11278 return(0);
11279
11280 if (ctxt->input == NULL) break;
11281 if (ctxt->input->buf == NULL)
11282 avail = ctxt->input->length -
11283 (ctxt->input->cur - ctxt->input->base);
11284 else {
11285 /*
11286 * If we are operating on converted input, try to flush
11287 * remaining chars to avoid them stalling in the non-converted
11288 * buffer. But do not do this in document start where
11289 * encoding="..." may not have been read and we work on a
11290 * guessed encoding.
11291 */
11292 if ((ctxt->instate != XML_PARSER_START) &&
11293 (ctxt->input->buf->raw != NULL) &&
11294 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11295 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11296 ctxt->input);
11297 size_t current = ctxt->input->cur - ctxt->input->base;
11298
11299 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11300 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11301 base, current);
11302 }
11303 avail = xmlBufUse(ctxt->input->buf->buffer) -
11304 (ctxt->input->cur - ctxt->input->base);
11305 }
11306 if (avail < 1)
11307 goto done;
11308 switch (ctxt->instate) {
11309 case XML_PARSER_EOF:
11310 /*
11311 * Document parsing is done !
11312 */
11313 goto done;
11314 case XML_PARSER_START:
11315 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11316 xmlChar start[4];
11317 xmlCharEncoding enc;
11318
11319 /*
11320 * Very first chars read from the document flow.
11321 */
11322 if (avail < 4)
11323 goto done;
11324
11325 /*
11326 * Get the 4 first bytes and decode the charset
11327 * if enc != XML_CHAR_ENCODING_NONE
11328 * plug some encoding conversion routines,
11329 * else xmlSwitchEncoding will set to (default)
11330 * UTF8.
11331 */
11332 start[0] = RAW;
11333 start[1] = NXT(1);
11334 start[2] = NXT(2);
11335 start[3] = NXT(3);
11336 enc = xmlDetectCharEncoding(start, 4);
11337 xmlSwitchEncoding(ctxt, enc);
11338 break;
11339 }
11340
11341 if (avail < 2)
11342 goto done;
11343 cur = ctxt->input->cur[0];
11344 next = ctxt->input->cur[1];
11345 if (cur == 0) {
11346 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11347 ctxt->sax->setDocumentLocator(ctxt->userData,
11348 &xmlDefaultSAXLocator);
11349 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11350 xmlHaltParser(ctxt);
11351 #ifdef DEBUG_PUSH
11352 xmlGenericError(xmlGenericErrorContext,
11353 "PP: entering EOF\n");
11354 #endif
11355 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11356 ctxt->sax->endDocument(ctxt->userData);
11357 goto done;
11358 }
11359 if ((cur == '<') && (next == '?')) {
11360 /* PI or XML decl */
11361 if (avail < 5) return(ret);
11362 if ((!terminate) &&
11363 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11364 return(ret);
11365 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11366 ctxt->sax->setDocumentLocator(ctxt->userData,
11367 &xmlDefaultSAXLocator);
11368 if ((ctxt->input->cur[2] == 'x') &&
11369 (ctxt->input->cur[3] == 'm') &&
11370 (ctxt->input->cur[4] == 'l') &&
11371 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11372 ret += 5;
11373 #ifdef DEBUG_PUSH
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: Parsing XML Decl\n");
11376 #endif
11377 xmlParseXMLDecl(ctxt);
11378 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11379 /*
11380 * The XML REC instructs us to stop parsing right
11381 * here
11382 */
11383 xmlHaltParser(ctxt);
11384 return(0);
11385 }
11386 ctxt->standalone = ctxt->input->standalone;
11387 if ((ctxt->encoding == NULL) &&
11388 (ctxt->input->encoding != NULL))
11389 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11390 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11391 (!ctxt->disableSAX))
11392 ctxt->sax->startDocument(ctxt->userData);
11393 ctxt->instate = XML_PARSER_MISC;
11394 #ifdef DEBUG_PUSH
11395 xmlGenericError(xmlGenericErrorContext,
11396 "PP: entering MISC\n");
11397 #endif
11398 } else {
11399 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11400 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11401 (!ctxt->disableSAX))
11402 ctxt->sax->startDocument(ctxt->userData);
11403 ctxt->instate = XML_PARSER_MISC;
11404 #ifdef DEBUG_PUSH
11405 xmlGenericError(xmlGenericErrorContext,
11406 "PP: entering MISC\n");
11407 #endif
11408 }
11409 } else {
11410 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11411 ctxt->sax->setDocumentLocator(ctxt->userData,
11412 &xmlDefaultSAXLocator);
11413 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11414 if (ctxt->version == NULL) {
11415 xmlErrMemory(ctxt, NULL);
11416 break;
11417 }
11418 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11419 (!ctxt->disableSAX))
11420 ctxt->sax->startDocument(ctxt->userData);
11421 ctxt->instate = XML_PARSER_MISC;
11422 #ifdef DEBUG_PUSH
11423 xmlGenericError(xmlGenericErrorContext,
11424 "PP: entering MISC\n");
11425 #endif
11426 }
11427 break;
11428 case XML_PARSER_START_TAG: {
11429 const xmlChar *name;
11430 const xmlChar *prefix = NULL;
11431 const xmlChar *URI = NULL;
11432 int line = ctxt->input->line;
11433 int nsNr = ctxt->nsNr;
11434
11435 if ((avail < 2) && (ctxt->inputNr == 1))
11436 goto done;
11437 cur = ctxt->input->cur[0];
11438 if (cur != '<') {
11439 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11440 xmlHaltParser(ctxt);
11441 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11442 ctxt->sax->endDocument(ctxt->userData);
11443 goto done;
11444 }
11445 if (!terminate) {
11446 if (ctxt->progressive) {
11447 /* > can be found unescaped in attribute values */
11448 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11449 goto done;
11450 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11451 goto done;
11452 }
11453 }
11454 if (ctxt->spaceNr == 0)
11455 spacePush(ctxt, -1);
11456 else if (*ctxt->space == -2)
11457 spacePush(ctxt, -1);
11458 else
11459 spacePush(ctxt, *ctxt->space);
11460 #ifdef LIBXML_SAX1_ENABLED
11461 if (ctxt->sax2)
11462 #endif /* LIBXML_SAX1_ENABLED */
11463 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11464 #ifdef LIBXML_SAX1_ENABLED
11465 else
11466 name = xmlParseStartTag(ctxt);
11467 #endif /* LIBXML_SAX1_ENABLED */
11468 if (ctxt->instate == XML_PARSER_EOF)
11469 goto done;
11470 if (name == NULL) {
11471 spacePop(ctxt);
11472 xmlHaltParser(ctxt);
11473 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11474 ctxt->sax->endDocument(ctxt->userData);
11475 goto done;
11476 }
11477 #ifdef LIBXML_VALID_ENABLED
11478 /*
11479 * [ VC: Root Element Type ]
11480 * The Name in the document type declaration must match
11481 * the element type of the root element.
11482 */
11483 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11484 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11485 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11486 #endif /* LIBXML_VALID_ENABLED */
11487
11488 /*
11489 * Check for an Empty Element.
11490 */
11491 if ((RAW == '/') && (NXT(1) == '>')) {
11492 SKIP(2);
11493
11494 if (ctxt->sax2) {
11495 if ((ctxt->sax != NULL) &&
11496 (ctxt->sax->endElementNs != NULL) &&
11497 (!ctxt->disableSAX))
11498 ctxt->sax->endElementNs(ctxt->userData, name,
11499 prefix, URI);
11500 if (ctxt->nsNr - nsNr > 0)
11501 nsPop(ctxt, ctxt->nsNr - nsNr);
11502 #ifdef LIBXML_SAX1_ENABLED
11503 } else {
11504 if ((ctxt->sax != NULL) &&
11505 (ctxt->sax->endElement != NULL) &&
11506 (!ctxt->disableSAX))
11507 ctxt->sax->endElement(ctxt->userData, name);
11508 #endif /* LIBXML_SAX1_ENABLED */
11509 }
11510 if (ctxt->instate == XML_PARSER_EOF)
11511 goto done;
11512 spacePop(ctxt);
11513 if (ctxt->nameNr == 0) {
11514 ctxt->instate = XML_PARSER_EPILOG;
11515 } else {
11516 ctxt->instate = XML_PARSER_CONTENT;
11517 }
11518 ctxt->progressive = 1;
11519 break;
11520 }
11521 if (RAW == '>') {
11522 NEXT;
11523 } else {
11524 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11525 "Couldn't find end of Start Tag %s\n",
11526 name);
11527 nodePop(ctxt);
11528 spacePop(ctxt);
11529 }
11530 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11531
11532 ctxt->instate = XML_PARSER_CONTENT;
11533 ctxt->progressive = 1;
11534 break;
11535 }
11536 case XML_PARSER_CONTENT: {
11537 const xmlChar *test;
11538 unsigned int cons;
11539 if ((avail < 2) && (ctxt->inputNr == 1))
11540 goto done;
11541 cur = ctxt->input->cur[0];
11542 next = ctxt->input->cur[1];
11543
11544 test = CUR_PTR;
11545 cons = ctxt->input->consumed;
11546 if ((cur == '<') && (next == '/')) {
11547 ctxt->instate = XML_PARSER_END_TAG;
11548 break;
11549 } else if ((cur == '<') && (next == '?')) {
11550 if ((!terminate) &&
11551 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11552 ctxt->progressive = XML_PARSER_PI;
11553 goto done;
11554 }
11555 xmlParsePI(ctxt);
11556 ctxt->instate = XML_PARSER_CONTENT;
11557 ctxt->progressive = 1;
11558 } else if ((cur == '<') && (next != '!')) {
11559 ctxt->instate = XML_PARSER_START_TAG;
11560 break;
11561 } else if ((cur == '<') && (next == '!') &&
11562 (ctxt->input->cur[2] == '-') &&
11563 (ctxt->input->cur[3] == '-')) {
11564 int term;
11565
11566 if (avail < 4)
11567 goto done;
11568 ctxt->input->cur += 4;
11569 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11570 ctxt->input->cur -= 4;
11571 if ((!terminate) && (term < 0)) {
11572 ctxt->progressive = XML_PARSER_COMMENT;
11573 goto done;
11574 }
11575 xmlParseComment(ctxt);
11576 ctxt->instate = XML_PARSER_CONTENT;
11577 ctxt->progressive = 1;
11578 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11579 (ctxt->input->cur[2] == '[') &&
11580 (ctxt->input->cur[3] == 'C') &&
11581 (ctxt->input->cur[4] == 'D') &&
11582 (ctxt->input->cur[5] == 'A') &&
11583 (ctxt->input->cur[6] == 'T') &&
11584 (ctxt->input->cur[7] == 'A') &&
11585 (ctxt->input->cur[8] == '[')) {
11586 SKIP(9);
11587 ctxt->instate = XML_PARSER_CDATA_SECTION;
11588 break;
11589 } else if ((cur == '<') && (next == '!') &&
11590 (avail < 9)) {
11591 goto done;
11592 } else if (cur == '&') {
11593 if ((!terminate) &&
11594 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11595 goto done;
11596 xmlParseReference(ctxt);
11597 } else {
11598 /* TODO Avoid the extra copy, handle directly !!! */
11599 /*
11600 * Goal of the following test is:
11601 * - minimize calls to the SAX 'character' callback
11602 * when they are mergeable
11603 * - handle an problem for isBlank when we only parse
11604 * a sequence of blank chars and the next one is
11605 * not available to check against '<' presence.
11606 * - tries to homogenize the differences in SAX
11607 * callbacks between the push and pull versions
11608 * of the parser.
11609 */
11610 if ((ctxt->inputNr == 1) &&
11611 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11612 if (!terminate) {
11613 if (ctxt->progressive) {
11614 if ((lastlt == NULL) ||
11615 (ctxt->input->cur > lastlt))
11616 goto done;
11617 } else if (xmlParseLookupSequence(ctxt,
11618 '<', 0, 0) < 0) {
11619 goto done;
11620 }
11621 }
11622 }
11623 ctxt->checkIndex = 0;
11624 xmlParseCharData(ctxt, 0);
11625 }
11626 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11627 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11628 "detected an error in element content\n");
11629 xmlHaltParser(ctxt);
11630 break;
11631 }
11632 break;
11633 }
11634 case XML_PARSER_END_TAG:
11635 if (avail < 2)
11636 goto done;
11637 if (!terminate) {
11638 if (ctxt->progressive) {
11639 /* > can be found unescaped in attribute values */
11640 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11641 goto done;
11642 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11643 goto done;
11644 }
11645 }
11646 if (ctxt->sax2) {
11647 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11648 nameNsPop(ctxt);
11649 }
11650 #ifdef LIBXML_SAX1_ENABLED
11651 else
11652 xmlParseEndTag1(ctxt, 0);
11653 #endif /* LIBXML_SAX1_ENABLED */
11654 if (ctxt->instate == XML_PARSER_EOF) {
11655 /* Nothing */
11656 } else if (ctxt->nameNr == 0) {
11657 ctxt->instate = XML_PARSER_EPILOG;
11658 } else {
11659 ctxt->instate = XML_PARSER_CONTENT;
11660 }
11661 break;
11662 case XML_PARSER_CDATA_SECTION: {
11663 /*
11664 * The Push mode need to have the SAX callback for
11665 * cdataBlock merge back contiguous callbacks.
11666 */
11667 int base;
11668
11669 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11670 if (base < 0) {
11671 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11672 int tmp;
11673
11674 tmp = xmlCheckCdataPush(ctxt->input->cur,
11675 XML_PARSER_BIG_BUFFER_SIZE, 0);
11676 if (tmp < 0) {
11677 tmp = -tmp;
11678 ctxt->input->cur += tmp;
11679 goto encoding_error;
11680 }
11681 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11682 if (ctxt->sax->cdataBlock != NULL)
11683 ctxt->sax->cdataBlock(ctxt->userData,
11684 ctxt->input->cur, tmp);
11685 else if (ctxt->sax->characters != NULL)
11686 ctxt->sax->characters(ctxt->userData,
11687 ctxt->input->cur, tmp);
11688 }
11689 if (ctxt->instate == XML_PARSER_EOF)
11690 goto done;
11691 SKIPL(tmp);
11692 ctxt->checkIndex = 0;
11693 }
11694 goto done;
11695 } else {
11696 int tmp;
11697
11698 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11699 if ((tmp < 0) || (tmp != base)) {
11700 tmp = -tmp;
11701 ctxt->input->cur += tmp;
11702 goto encoding_error;
11703 }
11704 if ((ctxt->sax != NULL) && (base == 0) &&
11705 (ctxt->sax->cdataBlock != NULL) &&
11706 (!ctxt->disableSAX)) {
11707 /*
11708 * Special case to provide identical behaviour
11709 * between pull and push parsers on enpty CDATA
11710 * sections
11711 */
11712 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11713 (!strncmp((const char *)&ctxt->input->cur[-9],
11714 "<![CDATA[", 9)))
11715 ctxt->sax->cdataBlock(ctxt->userData,
11716 BAD_CAST "", 0);
11717 } else if ((ctxt->sax != NULL) && (base > 0) &&
11718 (!ctxt->disableSAX)) {
11719 if (ctxt->sax->cdataBlock != NULL)
11720 ctxt->sax->cdataBlock(ctxt->userData,
11721 ctxt->input->cur, base);
11722 else if (ctxt->sax->characters != NULL)
11723 ctxt->sax->characters(ctxt->userData,
11724 ctxt->input->cur, base);
11725 }
11726 if (ctxt->instate == XML_PARSER_EOF)
11727 goto done;
11728 SKIPL(base + 3);
11729 ctxt->checkIndex = 0;
11730 ctxt->instate = XML_PARSER_CONTENT;
11731 #ifdef DEBUG_PUSH
11732 xmlGenericError(xmlGenericErrorContext,
11733 "PP: entering CONTENT\n");
11734 #endif
11735 }
11736 break;
11737 }
11738 case XML_PARSER_MISC:
11739 SKIP_BLANKS;
11740 if (ctxt->input->buf == NULL)
11741 avail = ctxt->input->length -
11742 (ctxt->input->cur - ctxt->input->base);
11743 else
11744 avail = xmlBufUse(ctxt->input->buf->buffer) -
11745 (ctxt->input->cur - ctxt->input->base);
11746 if (avail < 2)
11747 goto done;
11748 cur = ctxt->input->cur[0];
11749 next = ctxt->input->cur[1];
11750 if ((cur == '<') && (next == '?')) {
11751 if ((!terminate) &&
11752 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11753 ctxt->progressive = XML_PARSER_PI;
11754 goto done;
11755 }
11756 #ifdef DEBUG_PUSH
11757 xmlGenericError(xmlGenericErrorContext,
11758 "PP: Parsing PI\n");
11759 #endif
11760 xmlParsePI(ctxt);
11761 if (ctxt->instate == XML_PARSER_EOF)
11762 goto done;
11763 ctxt->instate = XML_PARSER_MISC;
11764 ctxt->progressive = 1;
11765 ctxt->checkIndex = 0;
11766 } else if ((cur == '<') && (next == '!') &&
11767 (ctxt->input->cur[2] == '-') &&
11768 (ctxt->input->cur[3] == '-')) {
11769 if ((!terminate) &&
11770 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11771 ctxt->progressive = XML_PARSER_COMMENT;
11772 goto done;
11773 }
11774 #ifdef DEBUG_PUSH
11775 xmlGenericError(xmlGenericErrorContext,
11776 "PP: Parsing Comment\n");
11777 #endif
11778 xmlParseComment(ctxt);
11779 if (ctxt->instate == XML_PARSER_EOF)
11780 goto done;
11781 ctxt->instate = XML_PARSER_MISC;
11782 ctxt->progressive = 1;
11783 ctxt->checkIndex = 0;
11784 } else if ((cur == '<') && (next == '!') &&
11785 (ctxt->input->cur[2] == 'D') &&
11786 (ctxt->input->cur[3] == 'O') &&
11787 (ctxt->input->cur[4] == 'C') &&
11788 (ctxt->input->cur[5] == 'T') &&
11789 (ctxt->input->cur[6] == 'Y') &&
11790 (ctxt->input->cur[7] == 'P') &&
11791 (ctxt->input->cur[8] == 'E')) {
11792 if ((!terminate) &&
11793 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11794 ctxt->progressive = XML_PARSER_DTD;
11795 goto done;
11796 }
11797 #ifdef DEBUG_PUSH
11798 xmlGenericError(xmlGenericErrorContext,
11799 "PP: Parsing internal subset\n");
11800 #endif
11801 ctxt->inSubset = 1;
11802 ctxt->progressive = 0;
11803 ctxt->checkIndex = 0;
11804 xmlParseDocTypeDecl(ctxt);
11805 if (ctxt->instate == XML_PARSER_EOF)
11806 goto done;
11807 if (RAW == '[') {
11808 ctxt->instate = XML_PARSER_DTD;
11809 #ifdef DEBUG_PUSH
11810 xmlGenericError(xmlGenericErrorContext,
11811 "PP: entering DTD\n");
11812 #endif
11813 } else {
11814 /*
11815 * Create and update the external subset.
11816 */
11817 ctxt->inSubset = 2;
11818 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11819 (ctxt->sax->externalSubset != NULL))
11820 ctxt->sax->externalSubset(ctxt->userData,
11821 ctxt->intSubName, ctxt->extSubSystem,
11822 ctxt->extSubURI);
11823 ctxt->inSubset = 0;
11824 xmlCleanSpecialAttr(ctxt);
11825 ctxt->instate = XML_PARSER_PROLOG;
11826 #ifdef DEBUG_PUSH
11827 xmlGenericError(xmlGenericErrorContext,
11828 "PP: entering PROLOG\n");
11829 #endif
11830 }
11831 } else if ((cur == '<') && (next == '!') &&
11832 (avail < 9)) {
11833 goto done;
11834 } else {
11835 ctxt->instate = XML_PARSER_START_TAG;
11836 ctxt->progressive = XML_PARSER_START_TAG;
11837 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11838 #ifdef DEBUG_PUSH
11839 xmlGenericError(xmlGenericErrorContext,
11840 "PP: entering START_TAG\n");
11841 #endif
11842 }
11843 break;
11844 case XML_PARSER_PROLOG:
11845 SKIP_BLANKS;
11846 if (ctxt->input->buf == NULL)
11847 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11848 else
11849 avail = xmlBufUse(ctxt->input->buf->buffer) -
11850 (ctxt->input->cur - ctxt->input->base);
11851 if (avail < 2)
11852 goto done;
11853 cur = ctxt->input->cur[0];
11854 next = ctxt->input->cur[1];
11855 if ((cur == '<') && (next == '?')) {
11856 if ((!terminate) &&
11857 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11858 ctxt->progressive = XML_PARSER_PI;
11859 goto done;
11860 }
11861 #ifdef DEBUG_PUSH
11862 xmlGenericError(xmlGenericErrorContext,
11863 "PP: Parsing PI\n");
11864 #endif
11865 xmlParsePI(ctxt);
11866 if (ctxt->instate == XML_PARSER_EOF)
11867 goto done;
11868 ctxt->instate = XML_PARSER_PROLOG;
11869 ctxt->progressive = 1;
11870 } else if ((cur == '<') && (next == '!') &&
11871 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11872 if ((!terminate) &&
11873 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11874 ctxt->progressive = XML_PARSER_COMMENT;
11875 goto done;
11876 }
11877 #ifdef DEBUG_PUSH
11878 xmlGenericError(xmlGenericErrorContext,
11879 "PP: Parsing Comment\n");
11880 #endif
11881 xmlParseComment(ctxt);
11882 if (ctxt->instate == XML_PARSER_EOF)
11883 goto done;
11884 ctxt->instate = XML_PARSER_PROLOG;
11885 ctxt->progressive = 1;
11886 } else if ((cur == '<') && (next == '!') &&
11887 (avail < 4)) {
11888 goto done;
11889 } else {
11890 ctxt->instate = XML_PARSER_START_TAG;
11891 if (ctxt->progressive == 0)
11892 ctxt->progressive = XML_PARSER_START_TAG;
11893 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11894 #ifdef DEBUG_PUSH
11895 xmlGenericError(xmlGenericErrorContext,
11896 "PP: entering START_TAG\n");
11897 #endif
11898 }
11899 break;
11900 case XML_PARSER_EPILOG:
11901 SKIP_BLANKS;
11902 if (ctxt->input->buf == NULL)
11903 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11904 else
11905 avail = xmlBufUse(ctxt->input->buf->buffer) -
11906 (ctxt->input->cur - ctxt->input->base);
11907 if (avail < 2)
11908 goto done;
11909 cur = ctxt->input->cur[0];
11910 next = ctxt->input->cur[1];
11911 if ((cur == '<') && (next == '?')) {
11912 if ((!terminate) &&
11913 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11914 ctxt->progressive = XML_PARSER_PI;
11915 goto done;
11916 }
11917 #ifdef DEBUG_PUSH
11918 xmlGenericError(xmlGenericErrorContext,
11919 "PP: Parsing PI\n");
11920 #endif
11921 xmlParsePI(ctxt);
11922 if (ctxt->instate == XML_PARSER_EOF)
11923 goto done;
11924 ctxt->instate = XML_PARSER_EPILOG;
11925 ctxt->progressive = 1;
11926 } else if ((cur == '<') && (next == '!') &&
11927 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11928 if ((!terminate) &&
11929 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11930 ctxt->progressive = XML_PARSER_COMMENT;
11931 goto done;
11932 }
11933 #ifdef DEBUG_PUSH
11934 xmlGenericError(xmlGenericErrorContext,
11935 "PP: Parsing Comment\n");
11936 #endif
11937 xmlParseComment(ctxt);
11938 if (ctxt->instate == XML_PARSER_EOF)
11939 goto done;
11940 ctxt->instate = XML_PARSER_EPILOG;
11941 ctxt->progressive = 1;
11942 } else if ((cur == '<') && (next == '!') &&
11943 (avail < 4)) {
11944 goto done;
11945 } else {
11946 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11947 xmlHaltParser(ctxt);
11948 #ifdef DEBUG_PUSH
11949 xmlGenericError(xmlGenericErrorContext,
11950 "PP: entering EOF\n");
11951 #endif
11952 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11953 ctxt->sax->endDocument(ctxt->userData);
11954 goto done;
11955 }
11956 break;
11957 case XML_PARSER_DTD: {
11958 /*
11959 * Sorry but progressive parsing of the internal subset
11960 * is not expected to be supported. We first check that
11961 * the full content of the internal subset is available and
11962 * the parsing is launched only at that point.
11963 * Internal subset ends up with "']' S? '>'" in an unescaped
11964 * section and not in a ']]>' sequence which are conditional
11965 * sections (whoever argued to keep that crap in XML deserve
11966 * a place in hell !).
11967 */
11968 int base, i;
11969 xmlChar *buf;
11970 xmlChar quote = 0;
11971 size_t use;
11972
11973 base = ctxt->input->cur - ctxt->input->base;
11974 if (base < 0) return(0);
11975 if (ctxt->checkIndex > base)
11976 base = ctxt->checkIndex;
11977 buf = xmlBufContent(ctxt->input->buf->buffer);
11978 use = xmlBufUse(ctxt->input->buf->buffer);
11979 for (;(unsigned int) base < use; base++) {
11980 if (quote != 0) {
11981 if (buf[base] == quote)
11982 quote = 0;
11983 continue;
11984 }
11985 if ((quote == 0) && (buf[base] == '<')) {
11986 int found = 0;
11987 /* special handling of comments */
11988 if (((unsigned int) base + 4 < use) &&
11989 (buf[base + 1] == '!') &&
11990 (buf[base + 2] == '-') &&
11991 (buf[base + 3] == '-')) {
11992 for (;(unsigned int) base + 3 < use; base++) {
11993 if ((buf[base] == '-') &&
11994 (buf[base + 1] == '-') &&
11995 (buf[base + 2] == '>')) {
11996 found = 1;
11997 base += 2;
11998 break;
11999 }
12000 }
12001 if (!found) {
12002 #if 0
12003 fprintf(stderr, "unfinished comment\n");
12004 #endif
12005 break; /* for */
12006 }
12007 continue;
12008 }
12009 }
12010 if (buf[base] == '"') {
12011 quote = '"';
12012 continue;
12013 }
12014 if (buf[base] == '\'') {
12015 quote = '\'';
12016 continue;
12017 }
12018 if (buf[base] == ']') {
12019 #if 0
12020 fprintf(stderr, "%c%c%c%c: ", buf[base],
12021 buf[base + 1], buf[base + 2], buf[base + 3]);
12022 #endif
12023 if ((unsigned int) base +1 >= use)
12024 break;
12025 if (buf[base + 1] == ']') {
12026 /* conditional crap, skip both ']' ! */
12027 base++;
12028 continue;
12029 }
12030 for (i = 1; (unsigned int) base + i < use; i++) {
12031 if (buf[base + i] == '>') {
12032 #if 0
12033 fprintf(stderr, "found\n");
12034 #endif
12035 goto found_end_int_subset;
12036 }
12037 if (!IS_BLANK_CH(buf[base + i])) {
12038 #if 0
12039 fprintf(stderr, "not found\n");
12040 #endif
12041 goto not_end_of_int_subset;
12042 }
12043 }
12044 #if 0
12045 fprintf(stderr, "end of stream\n");
12046 #endif
12047 break;
12048
12049 }
12050 not_end_of_int_subset:
12051 continue; /* for */
12052 }
12053 /*
12054 * We didn't found the end of the Internal subset
12055 */
12056 if (quote == 0)
12057 ctxt->checkIndex = base;
12058 else
12059 ctxt->checkIndex = 0;
12060 #ifdef DEBUG_PUSH
12061 if (next == 0)
12062 xmlGenericError(xmlGenericErrorContext,
12063 "PP: lookup of int subset end filed\n");
12064 #endif
12065 goto done;
12066
12067 found_end_int_subset:
12068 ctxt->checkIndex = 0;
12069 xmlParseInternalSubset(ctxt);
12070 if (ctxt->instate == XML_PARSER_EOF)
12071 goto done;
12072 ctxt->inSubset = 2;
12073 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12074 (ctxt->sax->externalSubset != NULL))
12075 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12076 ctxt->extSubSystem, ctxt->extSubURI);
12077 ctxt->inSubset = 0;
12078 xmlCleanSpecialAttr(ctxt);
12079 if (ctxt->instate == XML_PARSER_EOF)
12080 goto done;
12081 ctxt->instate = XML_PARSER_PROLOG;
12082 ctxt->checkIndex = 0;
12083 #ifdef DEBUG_PUSH
12084 xmlGenericError(xmlGenericErrorContext,
12085 "PP: entering PROLOG\n");
12086 #endif
12087 break;
12088 }
12089 case XML_PARSER_COMMENT:
12090 xmlGenericError(xmlGenericErrorContext,
12091 "PP: internal error, state == COMMENT\n");
12092 ctxt->instate = XML_PARSER_CONTENT;
12093 #ifdef DEBUG_PUSH
12094 xmlGenericError(xmlGenericErrorContext,
12095 "PP: entering CONTENT\n");
12096 #endif
12097 break;
12098 case XML_PARSER_IGNORE:
12099 xmlGenericError(xmlGenericErrorContext,
12100 "PP: internal error, state == IGNORE");
12101 ctxt->instate = XML_PARSER_DTD;
12102 #ifdef DEBUG_PUSH
12103 xmlGenericError(xmlGenericErrorContext,
12104 "PP: entering DTD\n");
12105 #endif
12106 break;
12107 case XML_PARSER_PI:
12108 xmlGenericError(xmlGenericErrorContext,
12109 "PP: internal error, state == PI\n");
12110 ctxt->instate = XML_PARSER_CONTENT;
12111 #ifdef DEBUG_PUSH
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: entering CONTENT\n");
12114 #endif
12115 break;
12116 case XML_PARSER_ENTITY_DECL:
12117 xmlGenericError(xmlGenericErrorContext,
12118 "PP: internal error, state == ENTITY_DECL\n");
12119 ctxt->instate = XML_PARSER_DTD;
12120 #ifdef DEBUG_PUSH
12121 xmlGenericError(xmlGenericErrorContext,
12122 "PP: entering DTD\n");
12123 #endif
12124 break;
12125 case XML_PARSER_ENTITY_VALUE:
12126 xmlGenericError(xmlGenericErrorContext,
12127 "PP: internal error, state == ENTITY_VALUE\n");
12128 ctxt->instate = XML_PARSER_CONTENT;
12129 #ifdef DEBUG_PUSH
12130 xmlGenericError(xmlGenericErrorContext,
12131 "PP: entering DTD\n");
12132 #endif
12133 break;
12134 case XML_PARSER_ATTRIBUTE_VALUE:
12135 xmlGenericError(xmlGenericErrorContext,
12136 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12137 ctxt->instate = XML_PARSER_START_TAG;
12138 #ifdef DEBUG_PUSH
12139 xmlGenericError(xmlGenericErrorContext,
12140 "PP: entering START_TAG\n");
12141 #endif
12142 break;
12143 case XML_PARSER_SYSTEM_LITERAL:
12144 xmlGenericError(xmlGenericErrorContext,
12145 "PP: internal error, state == SYSTEM_LITERAL\n");
12146 ctxt->instate = XML_PARSER_START_TAG;
12147 #ifdef DEBUG_PUSH
12148 xmlGenericError(xmlGenericErrorContext,
12149 "PP: entering START_TAG\n");
12150 #endif
12151 break;
12152 case XML_PARSER_PUBLIC_LITERAL:
12153 xmlGenericError(xmlGenericErrorContext,
12154 "PP: internal error, state == PUBLIC_LITERAL\n");
12155 ctxt->instate = XML_PARSER_START_TAG;
12156 #ifdef DEBUG_PUSH
12157 xmlGenericError(xmlGenericErrorContext,
12158 "PP: entering START_TAG\n");
12159 #endif
12160 break;
12161 }
12162 }
12163 done:
12164 #ifdef DEBUG_PUSH
12165 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12166 #endif
12167 return(ret);
12168 encoding_error:
12169 {
12170 char buffer[150];
12171
12172 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12173 ctxt->input->cur[0], ctxt->input->cur[1],
12174 ctxt->input->cur[2], ctxt->input->cur[3]);
12175 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12176 "Input is not proper UTF-8, indicate encoding !\n%s",
12177 BAD_CAST buffer, NULL);
12178 }
12179 return(0);
12180 }
12181
12182 /**
12183 * xmlParseCheckTransition:
12184 * @ctxt: an XML parser context
12185 * @chunk: a char array
12186 * @size: the size in byte of the chunk
12187 *
12188 * Check depending on the current parser state if the chunk given must be
12189 * processed immediately or one need more data to advance on parsing.
12190 *
12191 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12192 */
12193 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12194 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12195 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12196 return(-1);
12197 if (ctxt->instate == XML_PARSER_START_TAG) {
12198 if (memchr(chunk, '>', size) != NULL)
12199 return(1);
12200 return(0);
12201 }
12202 if (ctxt->progressive == XML_PARSER_COMMENT) {
12203 if (memchr(chunk, '>', size) != NULL)
12204 return(1);
12205 return(0);
12206 }
12207 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12208 if (memchr(chunk, '>', size) != NULL)
12209 return(1);
12210 return(0);
12211 }
12212 if (ctxt->progressive == XML_PARSER_PI) {
12213 if (memchr(chunk, '>', size) != NULL)
12214 return(1);
12215 return(0);
12216 }
12217 if (ctxt->instate == XML_PARSER_END_TAG) {
12218 if (memchr(chunk, '>', size) != NULL)
12219 return(1);
12220 return(0);
12221 }
12222 if ((ctxt->progressive == XML_PARSER_DTD) ||
12223 (ctxt->instate == XML_PARSER_DTD)) {
12224 if (memchr(chunk, '>', size) != NULL)
12225 return(1);
12226 return(0);
12227 }
12228 return(1);
12229 }
12230
12231 /**
12232 * xmlParseChunk:
12233 * @ctxt: an XML parser context
12234 * @chunk: an char array
12235 * @size: the size in byte of the chunk
12236 * @terminate: last chunk indicator
12237 *
12238 * Parse a Chunk of memory
12239 *
12240 * Returns zero if no error, the xmlParserErrors otherwise.
12241 */
12242 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12243 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12244 int terminate) {
12245 int end_in_lf = 0;
12246 int remain = 0;
12247 size_t old_avail = 0;
12248 size_t avail = 0;
12249
12250 if (ctxt == NULL)
12251 return(XML_ERR_INTERNAL_ERROR);
12252 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12253 return(ctxt->errNo);
12254 if (ctxt->instate == XML_PARSER_EOF)
12255 return(-1);
12256 if (ctxt->instate == XML_PARSER_START)
12257 xmlDetectSAX2(ctxt);
12258 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12259 (chunk[size - 1] == '\r')) {
12260 end_in_lf = 1;
12261 size--;
12262 }
12263
12264 xmldecl_done:
12265
12266 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12267 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12268 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12269 size_t cur = ctxt->input->cur - ctxt->input->base;
12270 int res;
12271
12272 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12273 /*
12274 * Specific handling if we autodetected an encoding, we should not
12275 * push more than the first line ... which depend on the encoding
12276 * And only push the rest once the final encoding was detected
12277 */
12278 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12279 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12280 unsigned int len = 45;
12281
12282 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12283 BAD_CAST "UTF-16")) ||
12284 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12285 BAD_CAST "UTF16")))
12286 len = 90;
12287 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12288 BAD_CAST "UCS-4")) ||
12289 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12290 BAD_CAST "UCS4")))
12291 len = 180;
12292
12293 if (ctxt->input->buf->rawconsumed < len)
12294 len -= ctxt->input->buf->rawconsumed;
12295
12296 /*
12297 * Change size for reading the initial declaration only
12298 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12299 * will blindly copy extra bytes from memory.
12300 */
12301 if ((unsigned int) size > len) {
12302 remain = size - len;
12303 size = len;
12304 } else {
12305 remain = 0;
12306 }
12307 }
12308 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12309 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12310 if (res < 0) {
12311 ctxt->errNo = XML_PARSER_EOF;
12312 xmlHaltParser(ctxt);
12313 return (XML_PARSER_EOF);
12314 }
12315 #ifdef DEBUG_PUSH
12316 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12317 #endif
12318
12319 } else if (ctxt->instate != XML_PARSER_EOF) {
12320 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12321 xmlParserInputBufferPtr in = ctxt->input->buf;
12322 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12323 (in->raw != NULL)) {
12324 int nbchars;
12325 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12326 size_t current = ctxt->input->cur - ctxt->input->base;
12327
12328 nbchars = xmlCharEncInput(in, terminate);
12329 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12330 if (nbchars < 0) {
12331 /* TODO 2.6.0 */
12332 xmlGenericError(xmlGenericErrorContext,
12333 "xmlParseChunk: encoder error\n");
12334 xmlHaltParser(ctxt);
12335 return(XML_ERR_INVALID_ENCODING);
12336 }
12337 }
12338 }
12339 }
12340 if (remain != 0) {
12341 xmlParseTryOrFinish(ctxt, 0);
12342 } else {
12343 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12344 avail = xmlBufUse(ctxt->input->buf->buffer);
12345 /*
12346 * Depending on the current state it may not be such
12347 * a good idea to try parsing if there is nothing in the chunk
12348 * which would be worth doing a parser state transition and we
12349 * need to wait for more data
12350 */
12351 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12352 (old_avail == 0) || (avail == 0) ||
12353 (xmlParseCheckTransition(ctxt,
12354 (const char *)&ctxt->input->base[old_avail],
12355 avail - old_avail)))
12356 xmlParseTryOrFinish(ctxt, terminate);
12357 }
12358 if (ctxt->instate == XML_PARSER_EOF)
12359 return(ctxt->errNo);
12360
12361 if ((ctxt->input != NULL) &&
12362 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12363 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12364 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12365 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12366 xmlHaltParser(ctxt);
12367 }
12368 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12369 return(ctxt->errNo);
12370
12371 if (remain != 0) {
12372 chunk += size;
12373 size = remain;
12374 remain = 0;
12375 goto xmldecl_done;
12376 }
12377 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12378 (ctxt->input->buf != NULL)) {
12379 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12380 ctxt->input);
12381 size_t current = ctxt->input->cur - ctxt->input->base;
12382
12383 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12384
12385 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12386 base, current);
12387 }
12388 if (terminate) {
12389 /*
12390 * Check for termination
12391 */
12392 int cur_avail = 0;
12393
12394 if (ctxt->input != NULL) {
12395 if (ctxt->input->buf == NULL)
12396 cur_avail = ctxt->input->length -
12397 (ctxt->input->cur - ctxt->input->base);
12398 else
12399 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12400 (ctxt->input->cur - ctxt->input->base);
12401 }
12402
12403 if ((ctxt->instate != XML_PARSER_EOF) &&
12404 (ctxt->instate != XML_PARSER_EPILOG)) {
12405 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12406 }
12407 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12408 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12409 }
12410 if (ctxt->instate != XML_PARSER_EOF) {
12411 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12412 ctxt->sax->endDocument(ctxt->userData);
12413 }
12414 ctxt->instate = XML_PARSER_EOF;
12415 }
12416 if (ctxt->wellFormed == 0)
12417 return((xmlParserErrors) ctxt->errNo);
12418 else
12419 return(0);
12420 }
12421
12422 /************************************************************************
12423 * *
12424 * I/O front end functions to the parser *
12425 * *
12426 ************************************************************************/
12427
12428 /**
12429 * xmlCreatePushParserCtxt:
12430 * @sax: a SAX handler
12431 * @user_data: The user data returned on SAX callbacks
12432 * @chunk: a pointer to an array of chars
12433 * @size: number of chars in the array
12434 * @filename: an optional file name or URI
12435 *
12436 * Create a parser context for using the XML parser in push mode.
12437 * If @buffer and @size are non-NULL, the data is used to detect
12438 * the encoding. The remaining characters will be parsed so they
12439 * don't need to be fed in again through xmlParseChunk.
12440 * To allow content encoding detection, @size should be >= 4
12441 * The value of @filename is used for fetching external entities
12442 * and error/warning reports.
12443 *
12444 * Returns the new parser context or NULL
12445 */
12446
12447 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12448 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12449 const char *chunk, int size, const char *filename) {
12450 xmlParserCtxtPtr ctxt;
12451 xmlParserInputPtr inputStream;
12452 xmlParserInputBufferPtr buf;
12453 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12454
12455 /*
12456 * plug some encoding conversion routines
12457 */
12458 if ((chunk != NULL) && (size >= 4))
12459 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12460
12461 buf = xmlAllocParserInputBuffer(enc);
12462 if (buf == NULL) return(NULL);
12463
12464 ctxt = xmlNewParserCtxt();
12465 if (ctxt == NULL) {
12466 xmlErrMemory(NULL, "creating parser: out of memory\n");
12467 xmlFreeParserInputBuffer(buf);
12468 return(NULL);
12469 }
12470 ctxt->dictNames = 1;
12471 if (sax != NULL) {
12472 #ifdef LIBXML_SAX1_ENABLED
12473 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12474 #endif /* LIBXML_SAX1_ENABLED */
12475 xmlFree(ctxt->sax);
12476 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12477 if (ctxt->sax == NULL) {
12478 xmlErrMemory(ctxt, NULL);
12479 xmlFreeParserInputBuffer(buf);
12480 xmlFreeParserCtxt(ctxt);
12481 return(NULL);
12482 }
12483 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12484 if (sax->initialized == XML_SAX2_MAGIC)
12485 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12486 else
12487 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12488 if (user_data != NULL)
12489 ctxt->userData = user_data;
12490 }
12491 if (filename == NULL) {
12492 ctxt->directory = NULL;
12493 } else {
12494 ctxt->directory = xmlParserGetDirectory(filename);
12495 }
12496
12497 inputStream = xmlNewInputStream(ctxt);
12498 if (inputStream == NULL) {
12499 xmlFreeParserCtxt(ctxt);
12500 xmlFreeParserInputBuffer(buf);
12501 return(NULL);
12502 }
12503
12504 if (filename == NULL)
12505 inputStream->filename = NULL;
12506 else {
12507 inputStream->filename = (char *)
12508 xmlCanonicPath((const xmlChar *) filename);
12509 if (inputStream->filename == NULL) {
12510 xmlFreeParserCtxt(ctxt);
12511 xmlFreeParserInputBuffer(buf);
12512 return(NULL);
12513 }
12514 }
12515 inputStream->buf = buf;
12516 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12517 inputPush(ctxt, inputStream);
12518
12519 /*
12520 * If the caller didn't provide an initial 'chunk' for determining
12521 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12522 * that it can be automatically determined later
12523 */
12524 if ((size == 0) || (chunk == NULL)) {
12525 ctxt->charset = XML_CHAR_ENCODING_NONE;
12526 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12527 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12528 size_t cur = ctxt->input->cur - ctxt->input->base;
12529
12530 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12531
12532 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12533 #ifdef DEBUG_PUSH
12534 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12535 #endif
12536 }
12537
12538 if (enc != XML_CHAR_ENCODING_NONE) {
12539 xmlSwitchEncoding(ctxt, enc);
12540 }
12541
12542 return(ctxt);
12543 }
12544 #endif /* LIBXML_PUSH_ENABLED */
12545
12546 /**
12547 * xmlHaltParser:
12548 * @ctxt: an XML parser context
12549 *
12550 * Blocks further parser processing don't override error
12551 * for internal use
12552 */
12553 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12554 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12555 if (ctxt == NULL)
12556 return;
12557 ctxt->instate = XML_PARSER_EOF;
12558 ctxt->disableSAX = 1;
12559 while (ctxt->inputNr > 1)
12560 xmlFreeInputStream(inputPop(ctxt));
12561 if (ctxt->input != NULL) {
12562 /*
12563 * in case there was a specific allocation deallocate before
12564 * overriding base
12565 */
12566 if (ctxt->input->free != NULL) {
12567 ctxt->input->free((xmlChar *) ctxt->input->base);
12568 ctxt->input->free = NULL;
12569 }
12570 if (ctxt->input->buf != NULL) {
12571 xmlFreeParserInputBuffer(ctxt->input->buf);
12572 ctxt->input->buf = NULL;
12573 }
12574 ctxt->input->cur = BAD_CAST"";
12575 ctxt->input->length = 0;
12576 ctxt->input->base = ctxt->input->cur;
12577 ctxt->input->end = ctxt->input->cur;
12578 }
12579 }
12580
12581 /**
12582 * xmlStopParser:
12583 * @ctxt: an XML parser context
12584 *
12585 * Blocks further parser processing
12586 */
12587 void
xmlStopParser(xmlParserCtxtPtr ctxt)12588 xmlStopParser(xmlParserCtxtPtr ctxt) {
12589 if (ctxt == NULL)
12590 return;
12591 xmlHaltParser(ctxt);
12592 ctxt->errNo = XML_ERR_USER_STOP;
12593 }
12594
12595 /**
12596 * xmlCreateIOParserCtxt:
12597 * @sax: a SAX handler
12598 * @user_data: The user data returned on SAX callbacks
12599 * @ioread: an I/O read function
12600 * @ioclose: an I/O close function
12601 * @ioctx: an I/O handler
12602 * @enc: the charset encoding if known
12603 *
12604 * Create a parser context for using the XML parser with an existing
12605 * I/O stream
12606 *
12607 * Returns the new parser context or NULL
12608 */
12609 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12610 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12611 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12612 void *ioctx, xmlCharEncoding enc) {
12613 xmlParserCtxtPtr ctxt;
12614 xmlParserInputPtr inputStream;
12615 xmlParserInputBufferPtr buf;
12616
12617 if (ioread == NULL) return(NULL);
12618
12619 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12620 if (buf == NULL) {
12621 if (ioclose != NULL)
12622 ioclose(ioctx);
12623 return (NULL);
12624 }
12625
12626 ctxt = xmlNewParserCtxt();
12627 if (ctxt == NULL) {
12628 xmlFreeParserInputBuffer(buf);
12629 return(NULL);
12630 }
12631 if (sax != NULL) {
12632 #ifdef LIBXML_SAX1_ENABLED
12633 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12634 #endif /* LIBXML_SAX1_ENABLED */
12635 xmlFree(ctxt->sax);
12636 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12637 if (ctxt->sax == NULL) {
12638 xmlFreeParserInputBuffer(buf);
12639 xmlErrMemory(ctxt, NULL);
12640 xmlFreeParserCtxt(ctxt);
12641 return(NULL);
12642 }
12643 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12644 if (sax->initialized == XML_SAX2_MAGIC)
12645 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12646 else
12647 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12648 if (user_data != NULL)
12649 ctxt->userData = user_data;
12650 }
12651
12652 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12653 if (inputStream == NULL) {
12654 xmlFreeParserCtxt(ctxt);
12655 return(NULL);
12656 }
12657 inputPush(ctxt, inputStream);
12658
12659 return(ctxt);
12660 }
12661
12662 #ifdef LIBXML_VALID_ENABLED
12663 /************************************************************************
12664 * *
12665 * Front ends when parsing a DTD *
12666 * *
12667 ************************************************************************/
12668
12669 /**
12670 * xmlIOParseDTD:
12671 * @sax: the SAX handler block or NULL
12672 * @input: an Input Buffer
12673 * @enc: the charset encoding if known
12674 *
12675 * Load and parse a DTD
12676 *
12677 * Returns the resulting xmlDtdPtr or NULL in case of error.
12678 * @input will be freed by the function in any case.
12679 */
12680
12681 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12682 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12683 xmlCharEncoding enc) {
12684 xmlDtdPtr ret = NULL;
12685 xmlParserCtxtPtr ctxt;
12686 xmlParserInputPtr pinput = NULL;
12687 xmlChar start[4];
12688
12689 if (input == NULL)
12690 return(NULL);
12691
12692 ctxt = xmlNewParserCtxt();
12693 if (ctxt == NULL) {
12694 xmlFreeParserInputBuffer(input);
12695 return(NULL);
12696 }
12697
12698 /* We are loading a DTD */
12699 ctxt->options |= XML_PARSE_DTDLOAD;
12700
12701 /*
12702 * Set-up the SAX context
12703 */
12704 if (sax != NULL) {
12705 if (ctxt->sax != NULL)
12706 xmlFree(ctxt->sax);
12707 ctxt->sax = sax;
12708 ctxt->userData = ctxt;
12709 }
12710 xmlDetectSAX2(ctxt);
12711
12712 /*
12713 * generate a parser input from the I/O handler
12714 */
12715
12716 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12717 if (pinput == NULL) {
12718 if (sax != NULL) ctxt->sax = NULL;
12719 xmlFreeParserInputBuffer(input);
12720 xmlFreeParserCtxt(ctxt);
12721 return(NULL);
12722 }
12723
12724 /*
12725 * plug some encoding conversion routines here.
12726 */
12727 if (xmlPushInput(ctxt, pinput) < 0) {
12728 if (sax != NULL) ctxt->sax = NULL;
12729 xmlFreeParserCtxt(ctxt);
12730 return(NULL);
12731 }
12732 if (enc != XML_CHAR_ENCODING_NONE) {
12733 xmlSwitchEncoding(ctxt, enc);
12734 }
12735
12736 pinput->filename = NULL;
12737 pinput->line = 1;
12738 pinput->col = 1;
12739 pinput->base = ctxt->input->cur;
12740 pinput->cur = ctxt->input->cur;
12741 pinput->free = NULL;
12742
12743 /*
12744 * let's parse that entity knowing it's an external subset.
12745 */
12746 ctxt->inSubset = 2;
12747 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12748 if (ctxt->myDoc == NULL) {
12749 xmlErrMemory(ctxt, "New Doc failed");
12750 return(NULL);
12751 }
12752 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12753 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12754 BAD_CAST "none", BAD_CAST "none");
12755
12756 if ((enc == XML_CHAR_ENCODING_NONE) &&
12757 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12758 /*
12759 * Get the 4 first bytes and decode the charset
12760 * if enc != XML_CHAR_ENCODING_NONE
12761 * plug some encoding conversion routines.
12762 */
12763 start[0] = RAW;
12764 start[1] = NXT(1);
12765 start[2] = NXT(2);
12766 start[3] = NXT(3);
12767 enc = xmlDetectCharEncoding(start, 4);
12768 if (enc != XML_CHAR_ENCODING_NONE) {
12769 xmlSwitchEncoding(ctxt, enc);
12770 }
12771 }
12772
12773 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12774
12775 if (ctxt->myDoc != NULL) {
12776 if (ctxt->wellFormed) {
12777 ret = ctxt->myDoc->extSubset;
12778 ctxt->myDoc->extSubset = NULL;
12779 if (ret != NULL) {
12780 xmlNodePtr tmp;
12781
12782 ret->doc = NULL;
12783 tmp = ret->children;
12784 while (tmp != NULL) {
12785 tmp->doc = NULL;
12786 tmp = tmp->next;
12787 }
12788 }
12789 } else {
12790 ret = NULL;
12791 }
12792 xmlFreeDoc(ctxt->myDoc);
12793 ctxt->myDoc = NULL;
12794 }
12795 if (sax != NULL) ctxt->sax = NULL;
12796 xmlFreeParserCtxt(ctxt);
12797
12798 return(ret);
12799 }
12800
12801 /**
12802 * xmlSAXParseDTD:
12803 * @sax: the SAX handler block
12804 * @ExternalID: a NAME* containing the External ID of the DTD
12805 * @SystemID: a NAME* containing the URL to the DTD
12806 *
12807 * Load and parse an external subset.
12808 *
12809 * Returns the resulting xmlDtdPtr or NULL in case of error.
12810 */
12811
12812 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12813 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12814 const xmlChar *SystemID) {
12815 xmlDtdPtr ret = NULL;
12816 xmlParserCtxtPtr ctxt;
12817 xmlParserInputPtr input = NULL;
12818 xmlCharEncoding enc;
12819 xmlChar* systemIdCanonic;
12820
12821 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12822
12823 ctxt = xmlNewParserCtxt();
12824 if (ctxt == NULL) {
12825 return(NULL);
12826 }
12827
12828 /* We are loading a DTD */
12829 ctxt->options |= XML_PARSE_DTDLOAD;
12830
12831 /*
12832 * Set-up the SAX context
12833 */
12834 if (sax != NULL) {
12835 if (ctxt->sax != NULL)
12836 xmlFree(ctxt->sax);
12837 ctxt->sax = sax;
12838 ctxt->userData = ctxt;
12839 }
12840
12841 /*
12842 * Canonicalise the system ID
12843 */
12844 systemIdCanonic = xmlCanonicPath(SystemID);
12845 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12846 xmlFreeParserCtxt(ctxt);
12847 return(NULL);
12848 }
12849
12850 /*
12851 * Ask the Entity resolver to load the damn thing
12852 */
12853
12854 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12855 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12856 systemIdCanonic);
12857 if (input == NULL) {
12858 if (sax != NULL) ctxt->sax = NULL;
12859 xmlFreeParserCtxt(ctxt);
12860 if (systemIdCanonic != NULL)
12861 xmlFree(systemIdCanonic);
12862 return(NULL);
12863 }
12864
12865 /*
12866 * plug some encoding conversion routines here.
12867 */
12868 if (xmlPushInput(ctxt, input) < 0) {
12869 if (sax != NULL) ctxt->sax = NULL;
12870 xmlFreeParserCtxt(ctxt);
12871 if (systemIdCanonic != NULL)
12872 xmlFree(systemIdCanonic);
12873 return(NULL);
12874 }
12875 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12876 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12877 xmlSwitchEncoding(ctxt, enc);
12878 }
12879
12880 if (input->filename == NULL)
12881 input->filename = (char *) systemIdCanonic;
12882 else
12883 xmlFree(systemIdCanonic);
12884 input->line = 1;
12885 input->col = 1;
12886 input->base = ctxt->input->cur;
12887 input->cur = ctxt->input->cur;
12888 input->free = NULL;
12889
12890 /*
12891 * let's parse that entity knowing it's an external subset.
12892 */
12893 ctxt->inSubset = 2;
12894 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12895 if (ctxt->myDoc == NULL) {
12896 xmlErrMemory(ctxt, "New Doc failed");
12897 if (sax != NULL) ctxt->sax = NULL;
12898 xmlFreeParserCtxt(ctxt);
12899 return(NULL);
12900 }
12901 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12902 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12903 ExternalID, SystemID);
12904 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12905
12906 if (ctxt->myDoc != NULL) {
12907 if (ctxt->wellFormed) {
12908 ret = ctxt->myDoc->extSubset;
12909 ctxt->myDoc->extSubset = NULL;
12910 if (ret != NULL) {
12911 xmlNodePtr tmp;
12912
12913 ret->doc = NULL;
12914 tmp = ret->children;
12915 while (tmp != NULL) {
12916 tmp->doc = NULL;
12917 tmp = tmp->next;
12918 }
12919 }
12920 } else {
12921 ret = NULL;
12922 }
12923 xmlFreeDoc(ctxt->myDoc);
12924 ctxt->myDoc = NULL;
12925 }
12926 if (sax != NULL) ctxt->sax = NULL;
12927 xmlFreeParserCtxt(ctxt);
12928
12929 return(ret);
12930 }
12931
12932
12933 /**
12934 * xmlParseDTD:
12935 * @ExternalID: a NAME* containing the External ID of the DTD
12936 * @SystemID: a NAME* containing the URL to the DTD
12937 *
12938 * Load and parse an external subset.
12939 *
12940 * Returns the resulting xmlDtdPtr or NULL in case of error.
12941 */
12942
12943 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12944 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12945 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12946 }
12947 #endif /* LIBXML_VALID_ENABLED */
12948
12949 /************************************************************************
12950 * *
12951 * Front ends when parsing an Entity *
12952 * *
12953 ************************************************************************/
12954
12955 /**
12956 * xmlParseCtxtExternalEntity:
12957 * @ctx: the existing parsing context
12958 * @URL: the URL for the entity to load
12959 * @ID: the System ID for the entity to load
12960 * @lst: the return value for the set of parsed nodes
12961 *
12962 * Parse an external general entity within an existing parsing context
12963 * An external general parsed entity is well-formed if it matches the
12964 * production labeled extParsedEnt.
12965 *
12966 * [78] extParsedEnt ::= TextDecl? content
12967 *
12968 * Returns 0 if the entity is well formed, -1 in case of args problem and
12969 * the parser error code otherwise
12970 */
12971
12972 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12973 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12974 const xmlChar *ID, xmlNodePtr *lst) {
12975 void *userData;
12976
12977 if (ctx == NULL) return(-1);
12978 /*
12979 * If the user provided their own SAX callbacks, then reuse the
12980 * userData callback field, otherwise the expected setup in a
12981 * DOM builder is to have userData == ctxt
12982 */
12983 if (ctx->userData == ctx)
12984 userData = NULL;
12985 else
12986 userData = ctx->userData;
12987 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12988 userData, ctx->depth + 1,
12989 URL, ID, lst);
12990 }
12991
12992 /**
12993 * xmlParseExternalEntityPrivate:
12994 * @doc: the document the chunk pertains to
12995 * @oldctxt: the previous parser context if available
12996 * @sax: the SAX handler block (possibly NULL)
12997 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12998 * @depth: Used for loop detection, use 0
12999 * @URL: the URL for the entity to load
13000 * @ID: the System ID for the entity to load
13001 * @list: the return value for the set of parsed nodes
13002 *
13003 * Private version of xmlParseExternalEntity()
13004 *
13005 * Returns 0 if the entity is well formed, -1 in case of args problem and
13006 * the parser error code otherwise
13007 */
13008
13009 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13010 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13011 xmlSAXHandlerPtr sax,
13012 void *user_data, int depth, const xmlChar *URL,
13013 const xmlChar *ID, xmlNodePtr *list) {
13014 xmlParserCtxtPtr ctxt;
13015 xmlDocPtr newDoc;
13016 xmlNodePtr newRoot;
13017 xmlSAXHandlerPtr oldsax = NULL;
13018 xmlParserErrors ret = XML_ERR_OK;
13019 xmlChar start[4];
13020 xmlCharEncoding enc;
13021
13022 if (((depth > 40) &&
13023 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13024 (depth > 1024)) {
13025 return(XML_ERR_ENTITY_LOOP);
13026 }
13027
13028 if (list != NULL)
13029 *list = NULL;
13030 if ((URL == NULL) && (ID == NULL))
13031 return(XML_ERR_INTERNAL_ERROR);
13032 if (doc == NULL)
13033 return(XML_ERR_INTERNAL_ERROR);
13034
13035
13036 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13037 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13038 ctxt->userData = ctxt;
13039 if (sax != NULL) {
13040 oldsax = ctxt->sax;
13041 ctxt->sax = sax;
13042 if (user_data != NULL)
13043 ctxt->userData = user_data;
13044 }
13045 xmlDetectSAX2(ctxt);
13046 newDoc = xmlNewDoc(BAD_CAST "1.0");
13047 if (newDoc == NULL) {
13048 xmlFreeParserCtxt(ctxt);
13049 return(XML_ERR_INTERNAL_ERROR);
13050 }
13051 newDoc->properties = XML_DOC_INTERNAL;
13052 if (doc) {
13053 newDoc->intSubset = doc->intSubset;
13054 newDoc->extSubset = doc->extSubset;
13055 if (doc->dict) {
13056 newDoc->dict = doc->dict;
13057 xmlDictReference(newDoc->dict);
13058 }
13059 if (doc->URL != NULL) {
13060 newDoc->URL = xmlStrdup(doc->URL);
13061 }
13062 }
13063 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13064 if (newRoot == NULL) {
13065 if (sax != NULL)
13066 ctxt->sax = oldsax;
13067 xmlFreeParserCtxt(ctxt);
13068 newDoc->intSubset = NULL;
13069 newDoc->extSubset = NULL;
13070 xmlFreeDoc(newDoc);
13071 return(XML_ERR_INTERNAL_ERROR);
13072 }
13073 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13074 nodePush(ctxt, newDoc->children);
13075 if (doc == NULL) {
13076 ctxt->myDoc = newDoc;
13077 } else {
13078 ctxt->myDoc = doc;
13079 newRoot->doc = doc;
13080 }
13081
13082 /*
13083 * Get the 4 first bytes and decode the charset
13084 * if enc != XML_CHAR_ENCODING_NONE
13085 * plug some encoding conversion routines.
13086 */
13087 GROW;
13088 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13089 start[0] = RAW;
13090 start[1] = NXT(1);
13091 start[2] = NXT(2);
13092 start[3] = NXT(3);
13093 enc = xmlDetectCharEncoding(start, 4);
13094 if (enc != XML_CHAR_ENCODING_NONE) {
13095 xmlSwitchEncoding(ctxt, enc);
13096 }
13097 }
13098
13099 /*
13100 * Parse a possible text declaration first
13101 */
13102 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13103 xmlParseTextDecl(ctxt);
13104 /*
13105 * An XML-1.0 document can't reference an entity not XML-1.0
13106 */
13107 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13108 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13109 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13110 "Version mismatch between document and entity\n");
13111 }
13112 }
13113
13114 ctxt->instate = XML_PARSER_CONTENT;
13115 ctxt->depth = depth;
13116 if (oldctxt != NULL) {
13117 ctxt->_private = oldctxt->_private;
13118 ctxt->loadsubset = oldctxt->loadsubset;
13119 ctxt->validate = oldctxt->validate;
13120 ctxt->valid = oldctxt->valid;
13121 ctxt->replaceEntities = oldctxt->replaceEntities;
13122 if (oldctxt->validate) {
13123 ctxt->vctxt.error = oldctxt->vctxt.error;
13124 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13125 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13126 }
13127 ctxt->external = oldctxt->external;
13128 if (ctxt->dict) xmlDictFree(ctxt->dict);
13129 ctxt->dict = oldctxt->dict;
13130 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13131 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13132 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13133 ctxt->dictNames = oldctxt->dictNames;
13134 ctxt->attsDefault = oldctxt->attsDefault;
13135 ctxt->attsSpecial = oldctxt->attsSpecial;
13136 ctxt->linenumbers = oldctxt->linenumbers;
13137 ctxt->record_info = oldctxt->record_info;
13138 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13139 ctxt->node_seq.length = oldctxt->node_seq.length;
13140 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13141 } else {
13142 /*
13143 * Doing validity checking on chunk without context
13144 * doesn't make sense
13145 */
13146 ctxt->_private = NULL;
13147 ctxt->validate = 0;
13148 ctxt->external = 2;
13149 ctxt->loadsubset = 0;
13150 }
13151
13152 xmlParseContent(ctxt);
13153
13154 if ((RAW == '<') && (NXT(1) == '/')) {
13155 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13156 } else if (RAW != 0) {
13157 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13158 }
13159 if (ctxt->node != newDoc->children) {
13160 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13161 }
13162
13163 if (!ctxt->wellFormed) {
13164 if (ctxt->errNo == 0)
13165 ret = XML_ERR_INTERNAL_ERROR;
13166 else
13167 ret = (xmlParserErrors)ctxt->errNo;
13168 } else {
13169 if (list != NULL) {
13170 xmlNodePtr cur;
13171
13172 /*
13173 * Return the newly created nodeset after unlinking it from
13174 * they pseudo parent.
13175 */
13176 cur = newDoc->children->children;
13177 *list = cur;
13178 while (cur != NULL) {
13179 cur->parent = NULL;
13180 cur = cur->next;
13181 }
13182 newDoc->children->children = NULL;
13183 }
13184 ret = XML_ERR_OK;
13185 }
13186
13187 /*
13188 * Record in the parent context the number of entities replacement
13189 * done when parsing that reference.
13190 */
13191 if (oldctxt != NULL)
13192 oldctxt->nbentities += ctxt->nbentities;
13193
13194 /*
13195 * Also record the size of the entity parsed
13196 */
13197 if (ctxt->input != NULL && oldctxt != NULL) {
13198 oldctxt->sizeentities += ctxt->input->consumed;
13199 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13200 }
13201 /*
13202 * And record the last error if any
13203 */
13204 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13205 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13206
13207 if (sax != NULL)
13208 ctxt->sax = oldsax;
13209 if (oldctxt != NULL) {
13210 ctxt->dict = NULL;
13211 ctxt->attsDefault = NULL;
13212 ctxt->attsSpecial = NULL;
13213 oldctxt->validate = ctxt->validate;
13214 oldctxt->valid = ctxt->valid;
13215 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13216 oldctxt->node_seq.length = ctxt->node_seq.length;
13217 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13218 }
13219 ctxt->node_seq.maximum = 0;
13220 ctxt->node_seq.length = 0;
13221 ctxt->node_seq.buffer = NULL;
13222 xmlFreeParserCtxt(ctxt);
13223 newDoc->intSubset = NULL;
13224 newDoc->extSubset = NULL;
13225 xmlFreeDoc(newDoc);
13226
13227 return(ret);
13228 }
13229
13230 #ifdef LIBXML_SAX1_ENABLED
13231 /**
13232 * xmlParseExternalEntity:
13233 * @doc: the document the chunk pertains to
13234 * @sax: the SAX handler block (possibly NULL)
13235 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13236 * @depth: Used for loop detection, use 0
13237 * @URL: the URL for the entity to load
13238 * @ID: the System ID for the entity to load
13239 * @lst: the return value for the set of parsed nodes
13240 *
13241 * Parse an external general entity
13242 * An external general parsed entity is well-formed if it matches the
13243 * production labeled extParsedEnt.
13244 *
13245 * [78] extParsedEnt ::= TextDecl? content
13246 *
13247 * Returns 0 if the entity is well formed, -1 in case of args problem and
13248 * the parser error code otherwise
13249 */
13250
13251 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13252 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13253 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13254 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13255 ID, lst));
13256 }
13257
13258 /**
13259 * xmlParseBalancedChunkMemory:
13260 * @doc: the document the chunk pertains to (must not be NULL)
13261 * @sax: the SAX handler block (possibly NULL)
13262 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13263 * @depth: Used for loop detection, use 0
13264 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13265 * @lst: the return value for the set of parsed nodes
13266 *
13267 * Parse a well-balanced chunk of an XML document
13268 * called by the parser
13269 * The allowed sequence for the Well Balanced Chunk is the one defined by
13270 * the content production in the XML grammar:
13271 *
13272 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13273 *
13274 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13275 * the parser error code otherwise
13276 */
13277
13278 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13279 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13280 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13281 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13282 depth, string, lst, 0 );
13283 }
13284 #endif /* LIBXML_SAX1_ENABLED */
13285
13286 /**
13287 * xmlParseBalancedChunkMemoryInternal:
13288 * @oldctxt: the existing parsing context
13289 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13290 * @user_data: the user data field for the parser context
13291 * @lst: the return value for the set of parsed nodes
13292 *
13293 *
13294 * Parse a well-balanced chunk of an XML document
13295 * called by the parser
13296 * The allowed sequence for the Well Balanced Chunk is the one defined by
13297 * the content production in the XML grammar:
13298 *
13299 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13300 *
13301 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13302 * error code otherwise
13303 *
13304 * In case recover is set to 1, the nodelist will not be empty even if
13305 * the parsed chunk is not well balanced.
13306 */
13307 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13308 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13309 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13310 xmlParserCtxtPtr ctxt;
13311 xmlDocPtr newDoc = NULL;
13312 xmlNodePtr newRoot;
13313 xmlSAXHandlerPtr oldsax = NULL;
13314 xmlNodePtr content = NULL;
13315 xmlNodePtr last = NULL;
13316 int size;
13317 xmlParserErrors ret = XML_ERR_OK;
13318 #ifdef SAX2
13319 int i;
13320 #endif
13321
13322 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13323 (oldctxt->depth > 1024)) {
13324 return(XML_ERR_ENTITY_LOOP);
13325 }
13326
13327
13328 if (lst != NULL)
13329 *lst = NULL;
13330 if (string == NULL)
13331 return(XML_ERR_INTERNAL_ERROR);
13332
13333 size = xmlStrlen(string);
13334
13335 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13336 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13337 if (user_data != NULL)
13338 ctxt->userData = user_data;
13339 else
13340 ctxt->userData = ctxt;
13341 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13342 ctxt->dict = oldctxt->dict;
13343 ctxt->input_id = oldctxt->input_id + 1;
13344 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13345 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13346 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13347
13348 #ifdef SAX2
13349 /* propagate namespaces down the entity */
13350 for (i = 0;i < oldctxt->nsNr;i += 2) {
13351 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13352 }
13353 #endif
13354
13355 oldsax = ctxt->sax;
13356 ctxt->sax = oldctxt->sax;
13357 xmlDetectSAX2(ctxt);
13358 ctxt->replaceEntities = oldctxt->replaceEntities;
13359 ctxt->options = oldctxt->options;
13360
13361 ctxt->_private = oldctxt->_private;
13362 if (oldctxt->myDoc == NULL) {
13363 newDoc = xmlNewDoc(BAD_CAST "1.0");
13364 if (newDoc == NULL) {
13365 ctxt->sax = oldsax;
13366 ctxt->dict = NULL;
13367 xmlFreeParserCtxt(ctxt);
13368 return(XML_ERR_INTERNAL_ERROR);
13369 }
13370 newDoc->properties = XML_DOC_INTERNAL;
13371 newDoc->dict = ctxt->dict;
13372 xmlDictReference(newDoc->dict);
13373 ctxt->myDoc = newDoc;
13374 } else {
13375 ctxt->myDoc = oldctxt->myDoc;
13376 content = ctxt->myDoc->children;
13377 last = ctxt->myDoc->last;
13378 }
13379 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13380 if (newRoot == NULL) {
13381 ctxt->sax = oldsax;
13382 ctxt->dict = NULL;
13383 xmlFreeParserCtxt(ctxt);
13384 if (newDoc != NULL) {
13385 xmlFreeDoc(newDoc);
13386 }
13387 return(XML_ERR_INTERNAL_ERROR);
13388 }
13389 ctxt->myDoc->children = NULL;
13390 ctxt->myDoc->last = NULL;
13391 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13392 nodePush(ctxt, ctxt->myDoc->children);
13393 ctxt->instate = XML_PARSER_CONTENT;
13394 ctxt->depth = oldctxt->depth + 1;
13395
13396 ctxt->validate = 0;
13397 ctxt->loadsubset = oldctxt->loadsubset;
13398 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13399 /*
13400 * ID/IDREF registration will be done in xmlValidateElement below
13401 */
13402 ctxt->loadsubset |= XML_SKIP_IDS;
13403 }
13404 ctxt->dictNames = oldctxt->dictNames;
13405 ctxt->attsDefault = oldctxt->attsDefault;
13406 ctxt->attsSpecial = oldctxt->attsSpecial;
13407
13408 xmlParseContent(ctxt);
13409 if ((RAW == '<') && (NXT(1) == '/')) {
13410 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13411 } else if (RAW != 0) {
13412 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13413 }
13414 if (ctxt->node != ctxt->myDoc->children) {
13415 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13416 }
13417
13418 if (!ctxt->wellFormed) {
13419 if (ctxt->errNo == 0)
13420 ret = XML_ERR_INTERNAL_ERROR;
13421 else
13422 ret = (xmlParserErrors)ctxt->errNo;
13423 } else {
13424 ret = XML_ERR_OK;
13425 }
13426
13427 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13428 xmlNodePtr cur;
13429
13430 /*
13431 * Return the newly created nodeset after unlinking it from
13432 * they pseudo parent.
13433 */
13434 cur = ctxt->myDoc->children->children;
13435 *lst = cur;
13436 while (cur != NULL) {
13437 #ifdef LIBXML_VALID_ENABLED
13438 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13439 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13440 (cur->type == XML_ELEMENT_NODE)) {
13441 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13442 oldctxt->myDoc, cur);
13443 }
13444 #endif /* LIBXML_VALID_ENABLED */
13445 cur->parent = NULL;
13446 cur = cur->next;
13447 }
13448 ctxt->myDoc->children->children = NULL;
13449 }
13450 if (ctxt->myDoc != NULL) {
13451 xmlFreeNode(ctxt->myDoc->children);
13452 ctxt->myDoc->children = content;
13453 ctxt->myDoc->last = last;
13454 }
13455
13456 /*
13457 * Record in the parent context the number of entities replacement
13458 * done when parsing that reference.
13459 */
13460 if (oldctxt != NULL)
13461 oldctxt->nbentities += ctxt->nbentities;
13462
13463 /*
13464 * Also record the last error if any
13465 */
13466 if (ctxt->lastError.code != XML_ERR_OK)
13467 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13468
13469 ctxt->sax = oldsax;
13470 ctxt->dict = NULL;
13471 ctxt->attsDefault = NULL;
13472 ctxt->attsSpecial = NULL;
13473 xmlFreeParserCtxt(ctxt);
13474 if (newDoc != NULL) {
13475 xmlFreeDoc(newDoc);
13476 }
13477
13478 return(ret);
13479 }
13480
13481 /**
13482 * xmlParseInNodeContext:
13483 * @node: the context node
13484 * @data: the input string
13485 * @datalen: the input string length in bytes
13486 * @options: a combination of xmlParserOption
13487 * @lst: the return value for the set of parsed nodes
13488 *
13489 * Parse a well-balanced chunk of an XML document
13490 * within the context (DTD, namespaces, etc ...) of the given node.
13491 *
13492 * The allowed sequence for the data is a Well Balanced Chunk defined by
13493 * the content production in the XML grammar:
13494 *
13495 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13496 *
13497 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13498 * error code otherwise
13499 */
13500 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13501 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13502 int options, xmlNodePtr *lst) {
13503 #ifdef SAX2
13504 xmlParserCtxtPtr ctxt;
13505 xmlDocPtr doc = NULL;
13506 xmlNodePtr fake, cur;
13507 int nsnr = 0;
13508
13509 xmlParserErrors ret = XML_ERR_OK;
13510
13511 /*
13512 * check all input parameters, grab the document
13513 */
13514 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13515 return(XML_ERR_INTERNAL_ERROR);
13516 switch (node->type) {
13517 case XML_ELEMENT_NODE:
13518 case XML_ATTRIBUTE_NODE:
13519 case XML_TEXT_NODE:
13520 case XML_CDATA_SECTION_NODE:
13521 case XML_ENTITY_REF_NODE:
13522 case XML_PI_NODE:
13523 case XML_COMMENT_NODE:
13524 case XML_DOCUMENT_NODE:
13525 case XML_HTML_DOCUMENT_NODE:
13526 break;
13527 default:
13528 return(XML_ERR_INTERNAL_ERROR);
13529
13530 }
13531 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13532 (node->type != XML_DOCUMENT_NODE) &&
13533 (node->type != XML_HTML_DOCUMENT_NODE))
13534 node = node->parent;
13535 if (node == NULL)
13536 return(XML_ERR_INTERNAL_ERROR);
13537 if (node->type == XML_ELEMENT_NODE)
13538 doc = node->doc;
13539 else
13540 doc = (xmlDocPtr) node;
13541 if (doc == NULL)
13542 return(XML_ERR_INTERNAL_ERROR);
13543
13544 /*
13545 * allocate a context and set-up everything not related to the
13546 * node position in the tree
13547 */
13548 if (doc->type == XML_DOCUMENT_NODE)
13549 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13550 #ifdef LIBXML_HTML_ENABLED
13551 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13552 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13553 /*
13554 * When parsing in context, it makes no sense to add implied
13555 * elements like html/body/etc...
13556 */
13557 options |= HTML_PARSE_NOIMPLIED;
13558 }
13559 #endif
13560 else
13561 return(XML_ERR_INTERNAL_ERROR);
13562
13563 if (ctxt == NULL)
13564 return(XML_ERR_NO_MEMORY);
13565
13566 /*
13567 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13568 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13569 * we must wait until the last moment to free the original one.
13570 */
13571 if (doc->dict != NULL) {
13572 if (ctxt->dict != NULL)
13573 xmlDictFree(ctxt->dict);
13574 ctxt->dict = doc->dict;
13575 } else
13576 options |= XML_PARSE_NODICT;
13577
13578 if (doc->encoding != NULL) {
13579 xmlCharEncodingHandlerPtr hdlr;
13580
13581 if (ctxt->encoding != NULL)
13582 xmlFree((xmlChar *) ctxt->encoding);
13583 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13584
13585 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13586 if (hdlr != NULL) {
13587 xmlSwitchToEncoding(ctxt, hdlr);
13588 } else {
13589 return(XML_ERR_UNSUPPORTED_ENCODING);
13590 }
13591 }
13592
13593 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13594 xmlDetectSAX2(ctxt);
13595 ctxt->myDoc = doc;
13596 /* parsing in context, i.e. as within existing content */
13597 ctxt->input_id = 2;
13598 ctxt->instate = XML_PARSER_CONTENT;
13599
13600 fake = xmlNewComment(NULL);
13601 if (fake == NULL) {
13602 xmlFreeParserCtxt(ctxt);
13603 return(XML_ERR_NO_MEMORY);
13604 }
13605 xmlAddChild(node, fake);
13606
13607 if (node->type == XML_ELEMENT_NODE) {
13608 nodePush(ctxt, node);
13609 /*
13610 * initialize the SAX2 namespaces stack
13611 */
13612 cur = node;
13613 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13614 xmlNsPtr ns = cur->nsDef;
13615 const xmlChar *iprefix, *ihref;
13616
13617 while (ns != NULL) {
13618 if (ctxt->dict) {
13619 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13620 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13621 } else {
13622 iprefix = ns->prefix;
13623 ihref = ns->href;
13624 }
13625
13626 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13627 nsPush(ctxt, iprefix, ihref);
13628 nsnr++;
13629 }
13630 ns = ns->next;
13631 }
13632 cur = cur->parent;
13633 }
13634 }
13635
13636 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13637 /*
13638 * ID/IDREF registration will be done in xmlValidateElement below
13639 */
13640 ctxt->loadsubset |= XML_SKIP_IDS;
13641 }
13642
13643 #ifdef LIBXML_HTML_ENABLED
13644 if (doc->type == XML_HTML_DOCUMENT_NODE)
13645 __htmlParseContent(ctxt);
13646 else
13647 #endif
13648 xmlParseContent(ctxt);
13649
13650 nsPop(ctxt, nsnr);
13651 if ((RAW == '<') && (NXT(1) == '/')) {
13652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13653 } else if (RAW != 0) {
13654 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13655 }
13656 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13657 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13658 ctxt->wellFormed = 0;
13659 }
13660
13661 if (!ctxt->wellFormed) {
13662 if (ctxt->errNo == 0)
13663 ret = XML_ERR_INTERNAL_ERROR;
13664 else
13665 ret = (xmlParserErrors)ctxt->errNo;
13666 } else {
13667 ret = XML_ERR_OK;
13668 }
13669
13670 /*
13671 * Return the newly created nodeset after unlinking it from
13672 * the pseudo sibling.
13673 */
13674
13675 cur = fake->next;
13676 fake->next = NULL;
13677 node->last = fake;
13678
13679 if (cur != NULL) {
13680 cur->prev = NULL;
13681 }
13682
13683 *lst = cur;
13684
13685 while (cur != NULL) {
13686 cur->parent = NULL;
13687 cur = cur->next;
13688 }
13689
13690 xmlUnlinkNode(fake);
13691 xmlFreeNode(fake);
13692
13693
13694 if (ret != XML_ERR_OK) {
13695 xmlFreeNodeList(*lst);
13696 *lst = NULL;
13697 }
13698
13699 if (doc->dict != NULL)
13700 ctxt->dict = NULL;
13701 xmlFreeParserCtxt(ctxt);
13702
13703 return(ret);
13704 #else /* !SAX2 */
13705 return(XML_ERR_INTERNAL_ERROR);
13706 #endif
13707 }
13708
13709 #ifdef LIBXML_SAX1_ENABLED
13710 /**
13711 * xmlParseBalancedChunkMemoryRecover:
13712 * @doc: the document the chunk pertains to (must not be NULL)
13713 * @sax: the SAX handler block (possibly NULL)
13714 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13715 * @depth: Used for loop detection, use 0
13716 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13717 * @lst: the return value for the set of parsed nodes
13718 * @recover: return nodes even if the data is broken (use 0)
13719 *
13720 *
13721 * Parse a well-balanced chunk of an XML document
13722 * called by the parser
13723 * The allowed sequence for the Well Balanced Chunk is the one defined by
13724 * the content production in the XML grammar:
13725 *
13726 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13727 *
13728 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13729 * the parser error code otherwise
13730 *
13731 * In case recover is set to 1, the nodelist will not be empty even if
13732 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13733 * some extent.
13734 */
13735 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13736 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13737 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13738 int recover) {
13739 xmlParserCtxtPtr ctxt;
13740 xmlDocPtr newDoc;
13741 xmlSAXHandlerPtr oldsax = NULL;
13742 xmlNodePtr content, newRoot;
13743 int size;
13744 int ret = 0;
13745
13746 if (depth > 40) {
13747 return(XML_ERR_ENTITY_LOOP);
13748 }
13749
13750
13751 if (lst != NULL)
13752 *lst = NULL;
13753 if (string == NULL)
13754 return(-1);
13755
13756 size = xmlStrlen(string);
13757
13758 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13759 if (ctxt == NULL) return(-1);
13760 ctxt->userData = ctxt;
13761 if (sax != NULL) {
13762 oldsax = ctxt->sax;
13763 ctxt->sax = sax;
13764 if (user_data != NULL)
13765 ctxt->userData = user_data;
13766 }
13767 newDoc = xmlNewDoc(BAD_CAST "1.0");
13768 if (newDoc == NULL) {
13769 xmlFreeParserCtxt(ctxt);
13770 return(-1);
13771 }
13772 newDoc->properties = XML_DOC_INTERNAL;
13773 if ((doc != NULL) && (doc->dict != NULL)) {
13774 xmlDictFree(ctxt->dict);
13775 ctxt->dict = doc->dict;
13776 xmlDictReference(ctxt->dict);
13777 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13778 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13779 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13780 ctxt->dictNames = 1;
13781 } else {
13782 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13783 }
13784 /* doc == NULL is only supported for historic reasons */
13785 if (doc != NULL) {
13786 newDoc->intSubset = doc->intSubset;
13787 newDoc->extSubset = doc->extSubset;
13788 }
13789 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13790 if (newRoot == NULL) {
13791 if (sax != NULL)
13792 ctxt->sax = oldsax;
13793 xmlFreeParserCtxt(ctxt);
13794 newDoc->intSubset = NULL;
13795 newDoc->extSubset = NULL;
13796 xmlFreeDoc(newDoc);
13797 return(-1);
13798 }
13799 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13800 nodePush(ctxt, newRoot);
13801 /* doc == NULL is only supported for historic reasons */
13802 if (doc == NULL) {
13803 ctxt->myDoc = newDoc;
13804 } else {
13805 ctxt->myDoc = newDoc;
13806 newDoc->children->doc = doc;
13807 /* Ensure that doc has XML spec namespace */
13808 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13809 newDoc->oldNs = doc->oldNs;
13810 }
13811 ctxt->instate = XML_PARSER_CONTENT;
13812 ctxt->input_id = 2;
13813 ctxt->depth = depth;
13814
13815 /*
13816 * Doing validity checking on chunk doesn't make sense
13817 */
13818 ctxt->validate = 0;
13819 ctxt->loadsubset = 0;
13820 xmlDetectSAX2(ctxt);
13821
13822 if ( doc != NULL ){
13823 content = doc->children;
13824 doc->children = NULL;
13825 xmlParseContent(ctxt);
13826 doc->children = content;
13827 }
13828 else {
13829 xmlParseContent(ctxt);
13830 }
13831 if ((RAW == '<') && (NXT(1) == '/')) {
13832 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13833 } else if (RAW != 0) {
13834 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13835 }
13836 if (ctxt->node != newDoc->children) {
13837 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13838 }
13839
13840 if (!ctxt->wellFormed) {
13841 if (ctxt->errNo == 0)
13842 ret = 1;
13843 else
13844 ret = ctxt->errNo;
13845 } else {
13846 ret = 0;
13847 }
13848
13849 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13850 xmlNodePtr cur;
13851
13852 /*
13853 * Return the newly created nodeset after unlinking it from
13854 * they pseudo parent.
13855 */
13856 cur = newDoc->children->children;
13857 *lst = cur;
13858 while (cur != NULL) {
13859 xmlSetTreeDoc(cur, doc);
13860 cur->parent = NULL;
13861 cur = cur->next;
13862 }
13863 newDoc->children->children = NULL;
13864 }
13865
13866 if (sax != NULL)
13867 ctxt->sax = oldsax;
13868 xmlFreeParserCtxt(ctxt);
13869 newDoc->intSubset = NULL;
13870 newDoc->extSubset = NULL;
13871 /* This leaks the namespace list if doc == NULL */
13872 newDoc->oldNs = NULL;
13873 xmlFreeDoc(newDoc);
13874
13875 return(ret);
13876 }
13877
13878 /**
13879 * xmlSAXParseEntity:
13880 * @sax: the SAX handler block
13881 * @filename: the filename
13882 *
13883 * parse an XML external entity out of context and build a tree.
13884 * It use the given SAX function block to handle the parsing callback.
13885 * If sax is NULL, fallback to the default DOM tree building routines.
13886 *
13887 * [78] extParsedEnt ::= TextDecl? content
13888 *
13889 * This correspond to a "Well Balanced" chunk
13890 *
13891 * Returns the resulting document tree
13892 */
13893
13894 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13895 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13896 xmlDocPtr ret;
13897 xmlParserCtxtPtr ctxt;
13898
13899 ctxt = xmlCreateFileParserCtxt(filename);
13900 if (ctxt == NULL) {
13901 return(NULL);
13902 }
13903 if (sax != NULL) {
13904 if (ctxt->sax != NULL)
13905 xmlFree(ctxt->sax);
13906 ctxt->sax = sax;
13907 ctxt->userData = NULL;
13908 }
13909
13910 xmlParseExtParsedEnt(ctxt);
13911
13912 if (ctxt->wellFormed)
13913 ret = ctxt->myDoc;
13914 else {
13915 ret = NULL;
13916 xmlFreeDoc(ctxt->myDoc);
13917 ctxt->myDoc = NULL;
13918 }
13919 if (sax != NULL)
13920 ctxt->sax = NULL;
13921 xmlFreeParserCtxt(ctxt);
13922
13923 return(ret);
13924 }
13925
13926 /**
13927 * xmlParseEntity:
13928 * @filename: the filename
13929 *
13930 * parse an XML external entity out of context and build a tree.
13931 *
13932 * [78] extParsedEnt ::= TextDecl? content
13933 *
13934 * This correspond to a "Well Balanced" chunk
13935 *
13936 * Returns the resulting document tree
13937 */
13938
13939 xmlDocPtr
xmlParseEntity(const char * filename)13940 xmlParseEntity(const char *filename) {
13941 return(xmlSAXParseEntity(NULL, filename));
13942 }
13943 #endif /* LIBXML_SAX1_ENABLED */
13944
13945 /**
13946 * xmlCreateEntityParserCtxtInternal:
13947 * @URL: the entity URL
13948 * @ID: the entity PUBLIC ID
13949 * @base: a possible base for the target URI
13950 * @pctx: parser context used to set options on new context
13951 *
13952 * Create a parser context for an external entity
13953 * Automatic support for ZLIB/Compress compressed document is provided
13954 * by default if found at compile-time.
13955 *
13956 * Returns the new parser context or NULL
13957 */
13958 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13959 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13960 const xmlChar *base, xmlParserCtxtPtr pctx) {
13961 xmlParserCtxtPtr ctxt;
13962 xmlParserInputPtr inputStream;
13963 char *directory = NULL;
13964 xmlChar *uri;
13965
13966 ctxt = xmlNewParserCtxt();
13967 if (ctxt == NULL) {
13968 return(NULL);
13969 }
13970
13971 if (pctx != NULL) {
13972 ctxt->options = pctx->options;
13973 ctxt->_private = pctx->_private;
13974 /*
13975 * this is a subparser of pctx, so the input_id should be
13976 * incremented to distinguish from main entity
13977 */
13978 ctxt->input_id = pctx->input_id + 1;
13979 }
13980
13981 /* Don't read from stdin. */
13982 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13983 URL = BAD_CAST "./-";
13984
13985 uri = xmlBuildURI(URL, base);
13986
13987 if (uri == NULL) {
13988 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13989 if (inputStream == NULL) {
13990 xmlFreeParserCtxt(ctxt);
13991 return(NULL);
13992 }
13993
13994 inputPush(ctxt, inputStream);
13995
13996 if ((ctxt->directory == NULL) && (directory == NULL))
13997 directory = xmlParserGetDirectory((char *)URL);
13998 if ((ctxt->directory == NULL) && (directory != NULL))
13999 ctxt->directory = directory;
14000 } else {
14001 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14002 if (inputStream == NULL) {
14003 xmlFree(uri);
14004 xmlFreeParserCtxt(ctxt);
14005 return(NULL);
14006 }
14007
14008 inputPush(ctxt, inputStream);
14009
14010 if ((ctxt->directory == NULL) && (directory == NULL))
14011 directory = xmlParserGetDirectory((char *)uri);
14012 if ((ctxt->directory == NULL) && (directory != NULL))
14013 ctxt->directory = directory;
14014 xmlFree(uri);
14015 }
14016 return(ctxt);
14017 }
14018
14019 /**
14020 * xmlCreateEntityParserCtxt:
14021 * @URL: the entity URL
14022 * @ID: the entity PUBLIC ID
14023 * @base: a possible base for the target URI
14024 *
14025 * Create a parser context for an external entity
14026 * Automatic support for ZLIB/Compress compressed document is provided
14027 * by default if found at compile-time.
14028 *
14029 * Returns the new parser context or NULL
14030 */
14031 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14032 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14033 const xmlChar *base) {
14034 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14035
14036 }
14037
14038 /************************************************************************
14039 * *
14040 * Front ends when parsing from a file *
14041 * *
14042 ************************************************************************/
14043
14044 /**
14045 * xmlCreateURLParserCtxt:
14046 * @filename: the filename or URL
14047 * @options: a combination of xmlParserOption
14048 *
14049 * Create a parser context for a file or URL content.
14050 * Automatic support for ZLIB/Compress compressed document is provided
14051 * by default if found at compile-time and for file accesses
14052 *
14053 * Returns the new parser context or NULL
14054 */
14055 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14056 xmlCreateURLParserCtxt(const char *filename, int options)
14057 {
14058 xmlParserCtxtPtr ctxt;
14059 xmlParserInputPtr inputStream;
14060 char *directory = NULL;
14061
14062 ctxt = xmlNewParserCtxt();
14063 if (ctxt == NULL) {
14064 xmlErrMemory(NULL, "cannot allocate parser context");
14065 return(NULL);
14066 }
14067
14068 if (options)
14069 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14070 ctxt->linenumbers = 1;
14071
14072 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14073 if (inputStream == NULL) {
14074 xmlFreeParserCtxt(ctxt);
14075 return(NULL);
14076 }
14077
14078 inputPush(ctxt, inputStream);
14079 if ((ctxt->directory == NULL) && (directory == NULL))
14080 directory = xmlParserGetDirectory(filename);
14081 if ((ctxt->directory == NULL) && (directory != NULL))
14082 ctxt->directory = directory;
14083
14084 return(ctxt);
14085 }
14086
14087 /**
14088 * xmlCreateFileParserCtxt:
14089 * @filename: the filename
14090 *
14091 * Create a parser context for a file content.
14092 * Automatic support for ZLIB/Compress compressed document is provided
14093 * by default if found at compile-time.
14094 *
14095 * Returns the new parser context or NULL
14096 */
14097 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14098 xmlCreateFileParserCtxt(const char *filename)
14099 {
14100 return(xmlCreateURLParserCtxt(filename, 0));
14101 }
14102
14103 #ifdef LIBXML_SAX1_ENABLED
14104 /**
14105 * xmlSAXParseFileWithData:
14106 * @sax: the SAX handler block
14107 * @filename: the filename
14108 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14109 * documents
14110 * @data: the userdata
14111 *
14112 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14113 * compressed document is provided by default if found at compile-time.
14114 * It use the given SAX function block to handle the parsing callback.
14115 * If sax is NULL, fallback to the default DOM tree building routines.
14116 *
14117 * User data (void *) is stored within the parser context in the
14118 * context's _private member, so it is available nearly everywhere in libxml
14119 *
14120 * Returns the resulting document tree
14121 */
14122
14123 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14124 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14125 int recovery, void *data) {
14126 xmlDocPtr ret;
14127 xmlParserCtxtPtr ctxt;
14128
14129 xmlInitParser();
14130
14131 ctxt = xmlCreateFileParserCtxt(filename);
14132 if (ctxt == NULL) {
14133 return(NULL);
14134 }
14135 if (sax != NULL) {
14136 if (ctxt->sax != NULL)
14137 xmlFree(ctxt->sax);
14138 ctxt->sax = sax;
14139 }
14140 xmlDetectSAX2(ctxt);
14141 if (data!=NULL) {
14142 ctxt->_private = data;
14143 }
14144
14145 if (ctxt->directory == NULL)
14146 ctxt->directory = xmlParserGetDirectory(filename);
14147
14148 ctxt->recovery = recovery;
14149
14150 xmlParseDocument(ctxt);
14151
14152 if ((ctxt->wellFormed) || recovery) {
14153 ret = ctxt->myDoc;
14154 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14155 if (ctxt->input->buf->compressed > 0)
14156 ret->compression = 9;
14157 else
14158 ret->compression = ctxt->input->buf->compressed;
14159 }
14160 }
14161 else {
14162 ret = NULL;
14163 xmlFreeDoc(ctxt->myDoc);
14164 ctxt->myDoc = NULL;
14165 }
14166 if (sax != NULL)
14167 ctxt->sax = NULL;
14168 xmlFreeParserCtxt(ctxt);
14169
14170 return(ret);
14171 }
14172
14173 /**
14174 * xmlSAXParseFile:
14175 * @sax: the SAX handler block
14176 * @filename: the filename
14177 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14178 * documents
14179 *
14180 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14181 * compressed document is provided by default if found at compile-time.
14182 * It use the given SAX function block to handle the parsing callback.
14183 * If sax is NULL, fallback to the default DOM tree building routines.
14184 *
14185 * Returns the resulting document tree
14186 */
14187
14188 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14189 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14190 int recovery) {
14191 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14192 }
14193
14194 /**
14195 * xmlRecoverDoc:
14196 * @cur: a pointer to an array of xmlChar
14197 *
14198 * parse an XML in-memory document and build a tree.
14199 * In the case the document is not Well Formed, a attempt to build a
14200 * tree is tried anyway
14201 *
14202 * Returns the resulting document tree or NULL in case of failure
14203 */
14204
14205 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14206 xmlRecoverDoc(const xmlChar *cur) {
14207 return(xmlSAXParseDoc(NULL, cur, 1));
14208 }
14209
14210 /**
14211 * xmlParseFile:
14212 * @filename: the filename
14213 *
14214 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14215 * compressed document is provided by default if found at compile-time.
14216 *
14217 * Returns the resulting document tree if the file was wellformed,
14218 * NULL otherwise.
14219 */
14220
14221 xmlDocPtr
xmlParseFile(const char * filename)14222 xmlParseFile(const char *filename) {
14223 return(xmlSAXParseFile(NULL, filename, 0));
14224 }
14225
14226 /**
14227 * xmlRecoverFile:
14228 * @filename: the filename
14229 *
14230 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14231 * compressed document is provided by default if found at compile-time.
14232 * In the case the document is not Well Formed, it attempts to build
14233 * a tree anyway
14234 *
14235 * Returns the resulting document tree or NULL in case of failure
14236 */
14237
14238 xmlDocPtr
xmlRecoverFile(const char * filename)14239 xmlRecoverFile(const char *filename) {
14240 return(xmlSAXParseFile(NULL, filename, 1));
14241 }
14242
14243
14244 /**
14245 * xmlSetupParserForBuffer:
14246 * @ctxt: an XML parser context
14247 * @buffer: a xmlChar * buffer
14248 * @filename: a file name
14249 *
14250 * Setup the parser context to parse a new buffer; Clears any prior
14251 * contents from the parser context. The buffer parameter must not be
14252 * NULL, but the filename parameter can be
14253 */
14254 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14255 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14256 const char* filename)
14257 {
14258 xmlParserInputPtr input;
14259
14260 if ((ctxt == NULL) || (buffer == NULL))
14261 return;
14262
14263 input = xmlNewInputStream(ctxt);
14264 if (input == NULL) {
14265 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14266 xmlClearParserCtxt(ctxt);
14267 return;
14268 }
14269
14270 xmlClearParserCtxt(ctxt);
14271 if (filename != NULL)
14272 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14273 input->base = buffer;
14274 input->cur = buffer;
14275 input->end = &buffer[xmlStrlen(buffer)];
14276 inputPush(ctxt, input);
14277 }
14278
14279 /**
14280 * xmlSAXUserParseFile:
14281 * @sax: a SAX handler
14282 * @user_data: The user data returned on SAX callbacks
14283 * @filename: a file name
14284 *
14285 * parse an XML file and call the given SAX handler routines.
14286 * Automatic support for ZLIB/Compress compressed document is provided
14287 *
14288 * Returns 0 in case of success or a error number otherwise
14289 */
14290 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14291 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14292 const char *filename) {
14293 int ret = 0;
14294 xmlParserCtxtPtr ctxt;
14295
14296 ctxt = xmlCreateFileParserCtxt(filename);
14297 if (ctxt == NULL) return -1;
14298 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14299 xmlFree(ctxt->sax);
14300 ctxt->sax = sax;
14301 xmlDetectSAX2(ctxt);
14302
14303 if (user_data != NULL)
14304 ctxt->userData = user_data;
14305
14306 xmlParseDocument(ctxt);
14307
14308 if (ctxt->wellFormed)
14309 ret = 0;
14310 else {
14311 if (ctxt->errNo != 0)
14312 ret = ctxt->errNo;
14313 else
14314 ret = -1;
14315 }
14316 if (sax != NULL)
14317 ctxt->sax = NULL;
14318 if (ctxt->myDoc != NULL) {
14319 xmlFreeDoc(ctxt->myDoc);
14320 ctxt->myDoc = NULL;
14321 }
14322 xmlFreeParserCtxt(ctxt);
14323
14324 return ret;
14325 }
14326 #endif /* LIBXML_SAX1_ENABLED */
14327
14328 /************************************************************************
14329 * *
14330 * Front ends when parsing from memory *
14331 * *
14332 ************************************************************************/
14333
14334 /**
14335 * xmlCreateMemoryParserCtxt:
14336 * @buffer: a pointer to a char array
14337 * @size: the size of the array
14338 *
14339 * Create a parser context for an XML in-memory document.
14340 *
14341 * Returns the new parser context or NULL
14342 */
14343 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14344 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14345 xmlParserCtxtPtr ctxt;
14346 xmlParserInputPtr input;
14347 xmlParserInputBufferPtr buf;
14348
14349 if (buffer == NULL)
14350 return(NULL);
14351 if (size <= 0)
14352 return(NULL);
14353
14354 ctxt = xmlNewParserCtxt();
14355 if (ctxt == NULL)
14356 return(NULL);
14357
14358 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14359 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14360 if (buf == NULL) {
14361 xmlFreeParserCtxt(ctxt);
14362 return(NULL);
14363 }
14364
14365 input = xmlNewInputStream(ctxt);
14366 if (input == NULL) {
14367 xmlFreeParserInputBuffer(buf);
14368 xmlFreeParserCtxt(ctxt);
14369 return(NULL);
14370 }
14371
14372 input->filename = NULL;
14373 input->buf = buf;
14374 xmlBufResetInput(input->buf->buffer, input);
14375
14376 inputPush(ctxt, input);
14377 return(ctxt);
14378 }
14379
14380 #ifdef LIBXML_SAX1_ENABLED
14381 /**
14382 * xmlSAXParseMemoryWithData:
14383 * @sax: the SAX handler block
14384 * @buffer: an pointer to a char array
14385 * @size: the size of the array
14386 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14387 * documents
14388 * @data: the userdata
14389 *
14390 * parse an XML in-memory block and use the given SAX function block
14391 * to handle the parsing callback. If sax is NULL, fallback to the default
14392 * DOM tree building routines.
14393 *
14394 * User data (void *) is stored within the parser context in the
14395 * context's _private member, so it is available nearly everywhere in libxml
14396 *
14397 * Returns the resulting document tree
14398 */
14399
14400 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14401 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14402 int size, int recovery, void *data) {
14403 xmlDocPtr ret;
14404 xmlParserCtxtPtr ctxt;
14405
14406 xmlInitParser();
14407
14408 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14409 if (ctxt == NULL) return(NULL);
14410 if (sax != NULL) {
14411 if (ctxt->sax != NULL)
14412 xmlFree(ctxt->sax);
14413 ctxt->sax = sax;
14414 }
14415 xmlDetectSAX2(ctxt);
14416 if (data!=NULL) {
14417 ctxt->_private=data;
14418 }
14419
14420 ctxt->recovery = recovery;
14421
14422 xmlParseDocument(ctxt);
14423
14424 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14425 else {
14426 ret = NULL;
14427 xmlFreeDoc(ctxt->myDoc);
14428 ctxt->myDoc = NULL;
14429 }
14430 if (sax != NULL)
14431 ctxt->sax = NULL;
14432 xmlFreeParserCtxt(ctxt);
14433
14434 return(ret);
14435 }
14436
14437 /**
14438 * xmlSAXParseMemory:
14439 * @sax: the SAX handler block
14440 * @buffer: an pointer to a char array
14441 * @size: the size of the array
14442 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14443 * documents
14444 *
14445 * parse an XML in-memory block and use the given SAX function block
14446 * to handle the parsing callback. If sax is NULL, fallback to the default
14447 * DOM tree building routines.
14448 *
14449 * Returns the resulting document tree
14450 */
14451 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14452 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14453 int size, int recovery) {
14454 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14455 }
14456
14457 /**
14458 * xmlParseMemory:
14459 * @buffer: an pointer to a char array
14460 * @size: the size of the array
14461 *
14462 * parse an XML in-memory block and build a tree.
14463 *
14464 * Returns the resulting document tree
14465 */
14466
xmlParseMemory(const char * buffer,int size)14467 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14468 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14469 }
14470
14471 /**
14472 * xmlRecoverMemory:
14473 * @buffer: an pointer to a char array
14474 * @size: the size of the array
14475 *
14476 * parse an XML in-memory block and build a tree.
14477 * In the case the document is not Well Formed, an attempt to
14478 * build a tree is tried anyway
14479 *
14480 * Returns the resulting document tree or NULL in case of error
14481 */
14482
xmlRecoverMemory(const char * buffer,int size)14483 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14484 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14485 }
14486
14487 /**
14488 * xmlSAXUserParseMemory:
14489 * @sax: a SAX handler
14490 * @user_data: The user data returned on SAX callbacks
14491 * @buffer: an in-memory XML document input
14492 * @size: the length of the XML document in bytes
14493 *
14494 * A better SAX parsing routine.
14495 * parse an XML in-memory buffer and call the given SAX handler routines.
14496 *
14497 * Returns 0 in case of success or a error number otherwise
14498 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14499 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14500 const char *buffer, int size) {
14501 int ret = 0;
14502 xmlParserCtxtPtr ctxt;
14503
14504 xmlInitParser();
14505
14506 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14507 if (ctxt == NULL) return -1;
14508 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14509 xmlFree(ctxt->sax);
14510 ctxt->sax = sax;
14511 xmlDetectSAX2(ctxt);
14512
14513 if (user_data != NULL)
14514 ctxt->userData = user_data;
14515
14516 xmlParseDocument(ctxt);
14517
14518 if (ctxt->wellFormed)
14519 ret = 0;
14520 else {
14521 if (ctxt->errNo != 0)
14522 ret = ctxt->errNo;
14523 else
14524 ret = -1;
14525 }
14526 if (sax != NULL)
14527 ctxt->sax = NULL;
14528 if (ctxt->myDoc != NULL) {
14529 xmlFreeDoc(ctxt->myDoc);
14530 ctxt->myDoc = NULL;
14531 }
14532 xmlFreeParserCtxt(ctxt);
14533
14534 return ret;
14535 }
14536 #endif /* LIBXML_SAX1_ENABLED */
14537
14538 /**
14539 * xmlCreateDocParserCtxt:
14540 * @cur: a pointer to an array of xmlChar
14541 *
14542 * Creates a parser context for an XML in-memory document.
14543 *
14544 * Returns the new parser context or NULL
14545 */
14546 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14547 xmlCreateDocParserCtxt(const xmlChar *cur) {
14548 int len;
14549
14550 if (cur == NULL)
14551 return(NULL);
14552 len = xmlStrlen(cur);
14553 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14554 }
14555
14556 #ifdef LIBXML_SAX1_ENABLED
14557 /**
14558 * xmlSAXParseDoc:
14559 * @sax: the SAX handler block
14560 * @cur: a pointer to an array of xmlChar
14561 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14562 * documents
14563 *
14564 * parse an XML in-memory document and build a tree.
14565 * It use the given SAX function block to handle the parsing callback.
14566 * If sax is NULL, fallback to the default DOM tree building routines.
14567 *
14568 * Returns the resulting document tree
14569 */
14570
14571 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14572 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14573 xmlDocPtr ret;
14574 xmlParserCtxtPtr ctxt;
14575 xmlSAXHandlerPtr oldsax = NULL;
14576
14577 if (cur == NULL) return(NULL);
14578
14579
14580 ctxt = xmlCreateDocParserCtxt(cur);
14581 if (ctxt == NULL) return(NULL);
14582 if (sax != NULL) {
14583 oldsax = ctxt->sax;
14584 ctxt->sax = sax;
14585 ctxt->userData = NULL;
14586 }
14587 xmlDetectSAX2(ctxt);
14588
14589 xmlParseDocument(ctxt);
14590 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14591 else {
14592 ret = NULL;
14593 xmlFreeDoc(ctxt->myDoc);
14594 ctxt->myDoc = NULL;
14595 }
14596 if (sax != NULL)
14597 ctxt->sax = oldsax;
14598 xmlFreeParserCtxt(ctxt);
14599
14600 return(ret);
14601 }
14602
14603 /**
14604 * xmlParseDoc:
14605 * @cur: a pointer to an array of xmlChar
14606 *
14607 * parse an XML in-memory document and build a tree.
14608 *
14609 * Returns the resulting document tree
14610 */
14611
14612 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14613 xmlParseDoc(const xmlChar *cur) {
14614 return(xmlSAXParseDoc(NULL, cur, 0));
14615 }
14616 #endif /* LIBXML_SAX1_ENABLED */
14617
14618 #ifdef LIBXML_LEGACY_ENABLED
14619 /************************************************************************
14620 * *
14621 * Specific function to keep track of entities references *
14622 * and used by the XSLT debugger *
14623 * *
14624 ************************************************************************/
14625
14626 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14627
14628 /**
14629 * xmlAddEntityReference:
14630 * @ent : A valid entity
14631 * @firstNode : A valid first node for children of entity
14632 * @lastNode : A valid last node of children entity
14633 *
14634 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14635 */
14636 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14637 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14638 xmlNodePtr lastNode)
14639 {
14640 if (xmlEntityRefFunc != NULL) {
14641 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14642 }
14643 }
14644
14645
14646 /**
14647 * xmlSetEntityReferenceFunc:
14648 * @func: A valid function
14649 *
14650 * Set the function to call call back when a xml reference has been made
14651 */
14652 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14653 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14654 {
14655 xmlEntityRefFunc = func;
14656 }
14657 #endif /* LIBXML_LEGACY_ENABLED */
14658
14659 /************************************************************************
14660 * *
14661 * Miscellaneous *
14662 * *
14663 ************************************************************************/
14664
14665 #ifdef LIBXML_XPATH_ENABLED
14666 #include <libxml/xpath.h>
14667 #endif
14668
14669 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14670 static int xmlParserInitialized = 0;
14671
14672 /**
14673 * xmlInitParser:
14674 *
14675 * Initialization function for the XML parser.
14676 * This is not reentrant. Call once before processing in case of
14677 * use in multithreaded programs.
14678 */
14679
14680 void
xmlInitParser(void)14681 xmlInitParser(void) {
14682 if (xmlParserInitialized != 0)
14683 return;
14684
14685 #if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14686 if (xmlFree == free)
14687 atexit(xmlCleanupParser);
14688 #endif
14689
14690 #ifdef LIBXML_THREAD_ENABLED
14691 __xmlGlobalInitMutexLock();
14692 if (xmlParserInitialized == 0) {
14693 #endif
14694 xmlInitThreads();
14695 xmlInitGlobals();
14696 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14697 (xmlGenericError == NULL))
14698 initGenericErrorDefaultFunc(NULL);
14699 xmlInitMemory();
14700 xmlInitializeDict();
14701 xmlInitCharEncodingHandlers();
14702 xmlDefaultSAXHandlerInit();
14703 xmlRegisterDefaultInputCallbacks();
14704 #ifdef LIBXML_OUTPUT_ENABLED
14705 xmlRegisterDefaultOutputCallbacks();
14706 #endif /* LIBXML_OUTPUT_ENABLED */
14707 #ifdef LIBXML_HTML_ENABLED
14708 htmlInitAutoClose();
14709 htmlDefaultSAXHandlerInit();
14710 #endif
14711 #ifdef LIBXML_XPATH_ENABLED
14712 xmlXPathInit();
14713 #endif
14714 xmlParserInitialized = 1;
14715 #ifdef LIBXML_THREAD_ENABLED
14716 }
14717 __xmlGlobalInitMutexUnlock();
14718 #endif
14719 }
14720
14721 /**
14722 * xmlCleanupParser:
14723 *
14724 * This function name is somewhat misleading. It does not clean up
14725 * parser state, it cleans up memory allocated by the library itself.
14726 * It is a cleanup function for the XML library. It tries to reclaim all
14727 * related global memory allocated for the library processing.
14728 * It doesn't deallocate any document related memory. One should
14729 * call xmlCleanupParser() only when the process has finished using
14730 * the library and all XML/HTML documents built with it.
14731 * See also xmlInitParser() which has the opposite function of preparing
14732 * the library for operations.
14733 *
14734 * WARNING: if your application is multithreaded or has plugin support
14735 * calling this may crash the application if another thread or
14736 * a plugin is still using libxml2. It's sometimes very hard to
14737 * guess if libxml2 is in use in the application, some libraries
14738 * or plugins may use it without notice. In case of doubt abstain
14739 * from calling this function or do it just before calling exit()
14740 * to avoid leak reports from valgrind !
14741 */
14742
14743 void
xmlCleanupParser(void)14744 xmlCleanupParser(void) {
14745 if (!xmlParserInitialized)
14746 return;
14747
14748 xmlCleanupCharEncodingHandlers();
14749 #ifdef LIBXML_CATALOG_ENABLED
14750 xmlCatalogCleanup();
14751 #endif
14752 xmlDictCleanup();
14753 xmlCleanupInputCallbacks();
14754 #ifdef LIBXML_OUTPUT_ENABLED
14755 xmlCleanupOutputCallbacks();
14756 #endif
14757 #ifdef LIBXML_SCHEMAS_ENABLED
14758 xmlSchemaCleanupTypes();
14759 xmlRelaxNGCleanupTypes();
14760 #endif
14761 xmlResetLastError();
14762 xmlCleanupGlobals();
14763 xmlCleanupThreads(); /* must be last if called not from the main thread */
14764 xmlCleanupMemory();
14765 xmlParserInitialized = 0;
14766 }
14767
14768 #if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14769 !defined(_WIN32)
14770 static void
14771 ATTRIBUTE_DESTRUCTOR
xmlDestructor(void)14772 xmlDestructor(void) {
14773 /*
14774 * Calling custom deallocation functions in a destructor can cause
14775 * problems, for example with Nokogiri.
14776 */
14777 if (xmlFree == free)
14778 xmlCleanupParser();
14779 }
14780 #endif
14781
14782 /************************************************************************
14783 * *
14784 * New set (2.6.0) of simpler and more flexible APIs *
14785 * *
14786 ************************************************************************/
14787
14788 /**
14789 * DICT_FREE:
14790 * @str: a string
14791 *
14792 * Free a string if it is not owned by the "dict" dictionary in the
14793 * current scope
14794 */
14795 #define DICT_FREE(str) \
14796 if ((str) && ((!dict) || \
14797 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14798 xmlFree((char *)(str));
14799
14800 /**
14801 * xmlCtxtReset:
14802 * @ctxt: an XML parser context
14803 *
14804 * Reset a parser context
14805 */
14806 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14807 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14808 {
14809 xmlParserInputPtr input;
14810 xmlDictPtr dict;
14811
14812 if (ctxt == NULL)
14813 return;
14814
14815 dict = ctxt->dict;
14816
14817 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14818 xmlFreeInputStream(input);
14819 }
14820 ctxt->inputNr = 0;
14821 ctxt->input = NULL;
14822
14823 ctxt->spaceNr = 0;
14824 if (ctxt->spaceTab != NULL) {
14825 ctxt->spaceTab[0] = -1;
14826 ctxt->space = &ctxt->spaceTab[0];
14827 } else {
14828 ctxt->space = NULL;
14829 }
14830
14831
14832 ctxt->nodeNr = 0;
14833 ctxt->node = NULL;
14834
14835 ctxt->nameNr = 0;
14836 ctxt->name = NULL;
14837
14838 DICT_FREE(ctxt->version);
14839 ctxt->version = NULL;
14840 DICT_FREE(ctxt->encoding);
14841 ctxt->encoding = NULL;
14842 DICT_FREE(ctxt->directory);
14843 ctxt->directory = NULL;
14844 DICT_FREE(ctxt->extSubURI);
14845 ctxt->extSubURI = NULL;
14846 DICT_FREE(ctxt->extSubSystem);
14847 ctxt->extSubSystem = NULL;
14848 if (ctxt->myDoc != NULL)
14849 xmlFreeDoc(ctxt->myDoc);
14850 ctxt->myDoc = NULL;
14851
14852 ctxt->standalone = -1;
14853 ctxt->hasExternalSubset = 0;
14854 ctxt->hasPErefs = 0;
14855 ctxt->html = 0;
14856 ctxt->external = 0;
14857 ctxt->instate = XML_PARSER_START;
14858 ctxt->token = 0;
14859
14860 ctxt->wellFormed = 1;
14861 ctxt->nsWellFormed = 1;
14862 ctxt->disableSAX = 0;
14863 ctxt->valid = 1;
14864 #if 0
14865 ctxt->vctxt.userData = ctxt;
14866 ctxt->vctxt.error = xmlParserValidityError;
14867 ctxt->vctxt.warning = xmlParserValidityWarning;
14868 #endif
14869 ctxt->record_info = 0;
14870 ctxt->checkIndex = 0;
14871 ctxt->inSubset = 0;
14872 ctxt->errNo = XML_ERR_OK;
14873 ctxt->depth = 0;
14874 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14875 ctxt->catalogs = NULL;
14876 ctxt->nbentities = 0;
14877 ctxt->sizeentities = 0;
14878 ctxt->sizeentcopy = 0;
14879 xmlInitNodeInfoSeq(&ctxt->node_seq);
14880
14881 if (ctxt->attsDefault != NULL) {
14882 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14883 ctxt->attsDefault = NULL;
14884 }
14885 if (ctxt->attsSpecial != NULL) {
14886 xmlHashFree(ctxt->attsSpecial, NULL);
14887 ctxt->attsSpecial = NULL;
14888 }
14889
14890 #ifdef LIBXML_CATALOG_ENABLED
14891 if (ctxt->catalogs != NULL)
14892 xmlCatalogFreeLocal(ctxt->catalogs);
14893 #endif
14894 if (ctxt->lastError.code != XML_ERR_OK)
14895 xmlResetError(&ctxt->lastError);
14896 }
14897
14898 /**
14899 * xmlCtxtResetPush:
14900 * @ctxt: an XML parser context
14901 * @chunk: a pointer to an array of chars
14902 * @size: number of chars in the array
14903 * @filename: an optional file name or URI
14904 * @encoding: the document encoding, or NULL
14905 *
14906 * Reset a push parser context
14907 *
14908 * Returns 0 in case of success and 1 in case of error
14909 */
14910 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14911 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14912 int size, const char *filename, const char *encoding)
14913 {
14914 xmlParserInputPtr inputStream;
14915 xmlParserInputBufferPtr buf;
14916 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14917
14918 if (ctxt == NULL)
14919 return(1);
14920
14921 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14922 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14923
14924 buf = xmlAllocParserInputBuffer(enc);
14925 if (buf == NULL)
14926 return(1);
14927
14928 if (ctxt == NULL) {
14929 xmlFreeParserInputBuffer(buf);
14930 return(1);
14931 }
14932
14933 xmlCtxtReset(ctxt);
14934
14935 if (filename == NULL) {
14936 ctxt->directory = NULL;
14937 } else {
14938 ctxt->directory = xmlParserGetDirectory(filename);
14939 }
14940
14941 inputStream = xmlNewInputStream(ctxt);
14942 if (inputStream == NULL) {
14943 xmlFreeParserInputBuffer(buf);
14944 return(1);
14945 }
14946
14947 if (filename == NULL)
14948 inputStream->filename = NULL;
14949 else
14950 inputStream->filename = (char *)
14951 xmlCanonicPath((const xmlChar *) filename);
14952 inputStream->buf = buf;
14953 xmlBufResetInput(buf->buffer, inputStream);
14954
14955 inputPush(ctxt, inputStream);
14956
14957 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14958 (ctxt->input->buf != NULL)) {
14959 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14960 size_t cur = ctxt->input->cur - ctxt->input->base;
14961
14962 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14963
14964 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14965 #ifdef DEBUG_PUSH
14966 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14967 #endif
14968 }
14969
14970 if (encoding != NULL) {
14971 xmlCharEncodingHandlerPtr hdlr;
14972
14973 if (ctxt->encoding != NULL)
14974 xmlFree((xmlChar *) ctxt->encoding);
14975 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14976
14977 hdlr = xmlFindCharEncodingHandler(encoding);
14978 if (hdlr != NULL) {
14979 xmlSwitchToEncoding(ctxt, hdlr);
14980 } else {
14981 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14982 "Unsupported encoding %s\n", BAD_CAST encoding);
14983 }
14984 } else if (enc != XML_CHAR_ENCODING_NONE) {
14985 xmlSwitchEncoding(ctxt, enc);
14986 }
14987
14988 return(0);
14989 }
14990
14991
14992 /**
14993 * xmlCtxtUseOptionsInternal:
14994 * @ctxt: an XML parser context
14995 * @options: a combination of xmlParserOption
14996 * @encoding: the user provided encoding to use
14997 *
14998 * Applies the options to the parser context
14999 *
15000 * Returns 0 in case of success, the set of unknown or unimplemented options
15001 * in case of error.
15002 */
15003 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15004 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15005 {
15006 if (ctxt == NULL)
15007 return(-1);
15008 if (encoding != NULL) {
15009 if (ctxt->encoding != NULL)
15010 xmlFree((xmlChar *) ctxt->encoding);
15011 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15012 }
15013 if (options & XML_PARSE_RECOVER) {
15014 ctxt->recovery = 1;
15015 options -= XML_PARSE_RECOVER;
15016 ctxt->options |= XML_PARSE_RECOVER;
15017 } else
15018 ctxt->recovery = 0;
15019 if (options & XML_PARSE_DTDLOAD) {
15020 ctxt->loadsubset = XML_DETECT_IDS;
15021 options -= XML_PARSE_DTDLOAD;
15022 ctxt->options |= XML_PARSE_DTDLOAD;
15023 } else
15024 ctxt->loadsubset = 0;
15025 if (options & XML_PARSE_DTDATTR) {
15026 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15027 options -= XML_PARSE_DTDATTR;
15028 ctxt->options |= XML_PARSE_DTDATTR;
15029 }
15030 if (options & XML_PARSE_NOENT) {
15031 ctxt->replaceEntities = 1;
15032 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15033 options -= XML_PARSE_NOENT;
15034 ctxt->options |= XML_PARSE_NOENT;
15035 } else
15036 ctxt->replaceEntities = 0;
15037 if (options & XML_PARSE_PEDANTIC) {
15038 ctxt->pedantic = 1;
15039 options -= XML_PARSE_PEDANTIC;
15040 ctxt->options |= XML_PARSE_PEDANTIC;
15041 } else
15042 ctxt->pedantic = 0;
15043 if (options & XML_PARSE_NOBLANKS) {
15044 ctxt->keepBlanks = 0;
15045 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15046 options -= XML_PARSE_NOBLANKS;
15047 ctxt->options |= XML_PARSE_NOBLANKS;
15048 } else
15049 ctxt->keepBlanks = 1;
15050 if (options & XML_PARSE_DTDVALID) {
15051 ctxt->validate = 1;
15052 if (options & XML_PARSE_NOWARNING)
15053 ctxt->vctxt.warning = NULL;
15054 if (options & XML_PARSE_NOERROR)
15055 ctxt->vctxt.error = NULL;
15056 options -= XML_PARSE_DTDVALID;
15057 ctxt->options |= XML_PARSE_DTDVALID;
15058 } else
15059 ctxt->validate = 0;
15060 if (options & XML_PARSE_NOWARNING) {
15061 ctxt->sax->warning = NULL;
15062 options -= XML_PARSE_NOWARNING;
15063 }
15064 if (options & XML_PARSE_NOERROR) {
15065 ctxt->sax->error = NULL;
15066 ctxt->sax->fatalError = NULL;
15067 options -= XML_PARSE_NOERROR;
15068 }
15069 #ifdef LIBXML_SAX1_ENABLED
15070 if (options & XML_PARSE_SAX1) {
15071 ctxt->sax->startElement = xmlSAX2StartElement;
15072 ctxt->sax->endElement = xmlSAX2EndElement;
15073 ctxt->sax->startElementNs = NULL;
15074 ctxt->sax->endElementNs = NULL;
15075 ctxt->sax->initialized = 1;
15076 options -= XML_PARSE_SAX1;
15077 ctxt->options |= XML_PARSE_SAX1;
15078 }
15079 #endif /* LIBXML_SAX1_ENABLED */
15080 if (options & XML_PARSE_NODICT) {
15081 ctxt->dictNames = 0;
15082 options -= XML_PARSE_NODICT;
15083 ctxt->options |= XML_PARSE_NODICT;
15084 } else {
15085 ctxt->dictNames = 1;
15086 }
15087 if (options & XML_PARSE_NOCDATA) {
15088 ctxt->sax->cdataBlock = NULL;
15089 options -= XML_PARSE_NOCDATA;
15090 ctxt->options |= XML_PARSE_NOCDATA;
15091 }
15092 if (options & XML_PARSE_NSCLEAN) {
15093 ctxt->options |= XML_PARSE_NSCLEAN;
15094 options -= XML_PARSE_NSCLEAN;
15095 }
15096 if (options & XML_PARSE_NONET) {
15097 ctxt->options |= XML_PARSE_NONET;
15098 options -= XML_PARSE_NONET;
15099 }
15100 if (options & XML_PARSE_COMPACT) {
15101 ctxt->options |= XML_PARSE_COMPACT;
15102 options -= XML_PARSE_COMPACT;
15103 }
15104 if (options & XML_PARSE_OLD10) {
15105 ctxt->options |= XML_PARSE_OLD10;
15106 options -= XML_PARSE_OLD10;
15107 }
15108 if (options & XML_PARSE_NOBASEFIX) {
15109 ctxt->options |= XML_PARSE_NOBASEFIX;
15110 options -= XML_PARSE_NOBASEFIX;
15111 }
15112 if (options & XML_PARSE_HUGE) {
15113 ctxt->options |= XML_PARSE_HUGE;
15114 options -= XML_PARSE_HUGE;
15115 if (ctxt->dict != NULL)
15116 xmlDictSetLimit(ctxt->dict, 0);
15117 }
15118 if (options & XML_PARSE_OLDSAX) {
15119 ctxt->options |= XML_PARSE_OLDSAX;
15120 options -= XML_PARSE_OLDSAX;
15121 }
15122 if (options & XML_PARSE_IGNORE_ENC) {
15123 ctxt->options |= XML_PARSE_IGNORE_ENC;
15124 options -= XML_PARSE_IGNORE_ENC;
15125 }
15126 if (options & XML_PARSE_BIG_LINES) {
15127 ctxt->options |= XML_PARSE_BIG_LINES;
15128 options -= XML_PARSE_BIG_LINES;
15129 }
15130 ctxt->linenumbers = 1;
15131 return (options);
15132 }
15133
15134 /**
15135 * xmlCtxtUseOptions:
15136 * @ctxt: an XML parser context
15137 * @options: a combination of xmlParserOption
15138 *
15139 * Applies the options to the parser context
15140 *
15141 * Returns 0 in case of success, the set of unknown or unimplemented options
15142 * in case of error.
15143 */
15144 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15145 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15146 {
15147 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15148 }
15149
15150 /**
15151 * xmlDoRead:
15152 * @ctxt: an XML parser context
15153 * @URL: the base URL to use for the document
15154 * @encoding: the document encoding, or NULL
15155 * @options: a combination of xmlParserOption
15156 * @reuse: keep the context for reuse
15157 *
15158 * Common front-end for the xmlRead functions
15159 *
15160 * Returns the resulting document tree or NULL
15161 */
15162 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15163 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15164 int options, int reuse)
15165 {
15166 xmlDocPtr ret;
15167
15168 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15169 if (encoding != NULL) {
15170 xmlCharEncodingHandlerPtr hdlr;
15171
15172 hdlr = xmlFindCharEncodingHandler(encoding);
15173 if (hdlr != NULL)
15174 xmlSwitchToEncoding(ctxt, hdlr);
15175 }
15176 if ((URL != NULL) && (ctxt->input != NULL) &&
15177 (ctxt->input->filename == NULL))
15178 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15179 xmlParseDocument(ctxt);
15180 if ((ctxt->wellFormed) || ctxt->recovery)
15181 ret = ctxt->myDoc;
15182 else {
15183 ret = NULL;
15184 if (ctxt->myDoc != NULL) {
15185 xmlFreeDoc(ctxt->myDoc);
15186 }
15187 }
15188 ctxt->myDoc = NULL;
15189 if (!reuse) {
15190 xmlFreeParserCtxt(ctxt);
15191 }
15192
15193 return (ret);
15194 }
15195
15196 /**
15197 * xmlReadDoc:
15198 * @cur: a pointer to a zero terminated string
15199 * @URL: the base URL to use for the document
15200 * @encoding: the document encoding, or NULL
15201 * @options: a combination of xmlParserOption
15202 *
15203 * parse an XML in-memory document and build a tree.
15204 *
15205 * Returns the resulting document tree
15206 */
15207 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15208 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15209 {
15210 xmlParserCtxtPtr ctxt;
15211
15212 if (cur == NULL)
15213 return (NULL);
15214 xmlInitParser();
15215
15216 ctxt = xmlCreateDocParserCtxt(cur);
15217 if (ctxt == NULL)
15218 return (NULL);
15219 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15220 }
15221
15222 /**
15223 * xmlReadFile:
15224 * @filename: a file or URL
15225 * @encoding: the document encoding, or NULL
15226 * @options: a combination of xmlParserOption
15227 *
15228 * parse an XML file from the filesystem or the network.
15229 *
15230 * Returns the resulting document tree
15231 */
15232 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15233 xmlReadFile(const char *filename, const char *encoding, int options)
15234 {
15235 xmlParserCtxtPtr ctxt;
15236
15237 xmlInitParser();
15238 ctxt = xmlCreateURLParserCtxt(filename, options);
15239 if (ctxt == NULL)
15240 return (NULL);
15241 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15242 }
15243
15244 /**
15245 * xmlReadMemory:
15246 * @buffer: a pointer to a char array
15247 * @size: the size of the array
15248 * @URL: the base URL to use for the document
15249 * @encoding: the document encoding, or NULL
15250 * @options: a combination of xmlParserOption
15251 *
15252 * parse an XML in-memory document and build a tree.
15253 *
15254 * Returns the resulting document tree
15255 */
15256 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15257 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15258 {
15259 xmlParserCtxtPtr ctxt;
15260
15261 xmlInitParser();
15262 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15263 if (ctxt == NULL)
15264 return (NULL);
15265 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15266 }
15267
15268 /**
15269 * xmlReadFd:
15270 * @fd: an open file descriptor
15271 * @URL: the base URL to use for the document
15272 * @encoding: the document encoding, or NULL
15273 * @options: a combination of xmlParserOption
15274 *
15275 * parse an XML from a file descriptor and build a tree.
15276 * NOTE that the file descriptor will not be closed when the
15277 * reader is closed or reset.
15278 *
15279 * Returns the resulting document tree
15280 */
15281 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15282 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15283 {
15284 xmlParserCtxtPtr ctxt;
15285 xmlParserInputBufferPtr input;
15286 xmlParserInputPtr stream;
15287
15288 if (fd < 0)
15289 return (NULL);
15290 xmlInitParser();
15291
15292 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15293 if (input == NULL)
15294 return (NULL);
15295 input->closecallback = NULL;
15296 ctxt = xmlNewParserCtxt();
15297 if (ctxt == NULL) {
15298 xmlFreeParserInputBuffer(input);
15299 return (NULL);
15300 }
15301 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15302 if (stream == NULL) {
15303 xmlFreeParserInputBuffer(input);
15304 xmlFreeParserCtxt(ctxt);
15305 return (NULL);
15306 }
15307 inputPush(ctxt, stream);
15308 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15309 }
15310
15311 /**
15312 * xmlReadIO:
15313 * @ioread: an I/O read function
15314 * @ioclose: an I/O close function
15315 * @ioctx: an I/O handler
15316 * @URL: the base URL to use for the document
15317 * @encoding: the document encoding, or NULL
15318 * @options: a combination of xmlParserOption
15319 *
15320 * parse an XML document from I/O functions and source and build a tree.
15321 *
15322 * Returns the resulting document tree
15323 */
15324 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15325 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15326 void *ioctx, const char *URL, const char *encoding, int options)
15327 {
15328 xmlParserCtxtPtr ctxt;
15329 xmlParserInputBufferPtr input;
15330 xmlParserInputPtr stream;
15331
15332 if (ioread == NULL)
15333 return (NULL);
15334 xmlInitParser();
15335
15336 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15337 XML_CHAR_ENCODING_NONE);
15338 if (input == NULL) {
15339 if (ioclose != NULL)
15340 ioclose(ioctx);
15341 return (NULL);
15342 }
15343 ctxt = xmlNewParserCtxt();
15344 if (ctxt == NULL) {
15345 xmlFreeParserInputBuffer(input);
15346 return (NULL);
15347 }
15348 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15349 if (stream == NULL) {
15350 xmlFreeParserInputBuffer(input);
15351 xmlFreeParserCtxt(ctxt);
15352 return (NULL);
15353 }
15354 inputPush(ctxt, stream);
15355 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15356 }
15357
15358 /**
15359 * xmlCtxtReadDoc:
15360 * @ctxt: an XML parser context
15361 * @cur: a pointer to a zero terminated string
15362 * @URL: the base URL to use for the document
15363 * @encoding: the document encoding, or NULL
15364 * @options: a combination of xmlParserOption
15365 *
15366 * parse an XML in-memory document and build a tree.
15367 * This reuses the existing @ctxt parser context
15368 *
15369 * Returns the resulting document tree
15370 */
15371 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15372 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15373 const char *URL, const char *encoding, int options)
15374 {
15375 xmlParserInputPtr stream;
15376
15377 if (cur == NULL)
15378 return (NULL);
15379 if (ctxt == NULL)
15380 return (NULL);
15381 xmlInitParser();
15382
15383 xmlCtxtReset(ctxt);
15384
15385 stream = xmlNewStringInputStream(ctxt, cur);
15386 if (stream == NULL) {
15387 return (NULL);
15388 }
15389 inputPush(ctxt, stream);
15390 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15391 }
15392
15393 /**
15394 * xmlCtxtReadFile:
15395 * @ctxt: an XML parser context
15396 * @filename: a file or URL
15397 * @encoding: the document encoding, or NULL
15398 * @options: a combination of xmlParserOption
15399 *
15400 * parse an XML file from the filesystem or the network.
15401 * This reuses the existing @ctxt parser context
15402 *
15403 * Returns the resulting document tree
15404 */
15405 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15406 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15407 const char *encoding, int options)
15408 {
15409 xmlParserInputPtr stream;
15410
15411 if (filename == NULL)
15412 return (NULL);
15413 if (ctxt == NULL)
15414 return (NULL);
15415 xmlInitParser();
15416
15417 xmlCtxtReset(ctxt);
15418
15419 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15420 if (stream == NULL) {
15421 return (NULL);
15422 }
15423 inputPush(ctxt, stream);
15424 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15425 }
15426
15427 /**
15428 * xmlCtxtReadMemory:
15429 * @ctxt: an XML parser context
15430 * @buffer: a pointer to a char array
15431 * @size: the size of the array
15432 * @URL: the base URL to use for the document
15433 * @encoding: the document encoding, or NULL
15434 * @options: a combination of xmlParserOption
15435 *
15436 * parse an XML in-memory document and build a tree.
15437 * This reuses the existing @ctxt parser context
15438 *
15439 * Returns the resulting document tree
15440 */
15441 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15442 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15443 const char *URL, const char *encoding, int options)
15444 {
15445 xmlParserInputBufferPtr input;
15446 xmlParserInputPtr stream;
15447
15448 if (ctxt == NULL)
15449 return (NULL);
15450 if (buffer == NULL)
15451 return (NULL);
15452 xmlInitParser();
15453
15454 xmlCtxtReset(ctxt);
15455
15456 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15457 if (input == NULL) {
15458 return(NULL);
15459 }
15460
15461 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15462 if (stream == NULL) {
15463 xmlFreeParserInputBuffer(input);
15464 return(NULL);
15465 }
15466
15467 inputPush(ctxt, stream);
15468 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15469 }
15470
15471 /**
15472 * xmlCtxtReadFd:
15473 * @ctxt: an XML parser context
15474 * @fd: an open file descriptor
15475 * @URL: the base URL to use for the document
15476 * @encoding: the document encoding, or NULL
15477 * @options: a combination of xmlParserOption
15478 *
15479 * parse an XML from a file descriptor and build a tree.
15480 * This reuses the existing @ctxt parser context
15481 * NOTE that the file descriptor will not be closed when the
15482 * reader is closed or reset.
15483 *
15484 * Returns the resulting document tree
15485 */
15486 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15487 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15488 const char *URL, const char *encoding, int options)
15489 {
15490 xmlParserInputBufferPtr input;
15491 xmlParserInputPtr stream;
15492
15493 if (fd < 0)
15494 return (NULL);
15495 if (ctxt == NULL)
15496 return (NULL);
15497 xmlInitParser();
15498
15499 xmlCtxtReset(ctxt);
15500
15501
15502 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15503 if (input == NULL)
15504 return (NULL);
15505 input->closecallback = NULL;
15506 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15507 if (stream == NULL) {
15508 xmlFreeParserInputBuffer(input);
15509 return (NULL);
15510 }
15511 inputPush(ctxt, stream);
15512 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15513 }
15514
15515 /**
15516 * xmlCtxtReadIO:
15517 * @ctxt: an XML parser context
15518 * @ioread: an I/O read function
15519 * @ioclose: an I/O close function
15520 * @ioctx: an I/O handler
15521 * @URL: the base URL to use for the document
15522 * @encoding: the document encoding, or NULL
15523 * @options: a combination of xmlParserOption
15524 *
15525 * parse an XML document from I/O functions and source and build a tree.
15526 * This reuses the existing @ctxt parser context
15527 *
15528 * Returns the resulting document tree
15529 */
15530 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15531 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15532 xmlInputCloseCallback ioclose, void *ioctx,
15533 const char *URL,
15534 const char *encoding, int options)
15535 {
15536 xmlParserInputBufferPtr input;
15537 xmlParserInputPtr stream;
15538
15539 if (ioread == NULL)
15540 return (NULL);
15541 if (ctxt == NULL)
15542 return (NULL);
15543 xmlInitParser();
15544
15545 xmlCtxtReset(ctxt);
15546
15547 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15548 XML_CHAR_ENCODING_NONE);
15549 if (input == NULL) {
15550 if (ioclose != NULL)
15551 ioclose(ioctx);
15552 return (NULL);
15553 }
15554 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15555 if (stream == NULL) {
15556 xmlFreeParserInputBuffer(input);
15557 return (NULL);
15558 }
15559 inputPush(ctxt, stream);
15560 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15561 }
15562
15563 #define bottom_parser
15564 #include "elfgcchack.h"
15565