1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32) && !defined (__CYGWIN__)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <libxml/xmlmemory.h>
53 #include <libxml/threads.h>
54 #include <libxml/globals.h>
55 #include <libxml/tree.h>
56 #include <libxml/parser.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #ifdef LIBXML_CATALOG_ENABLED
65 #include <libxml/catalog.h>
66 #endif
67 #ifdef LIBXML_SCHEMAS_ENABLED
68 #include <libxml/xmlschemastypes.h>
69 #include <libxml/relaxng.h>
70 #endif
71 #ifdef HAVE_CTYPE_H
72 #include <ctype.h>
73 #endif
74 #ifdef HAVE_STDLIB_H
75 #include <stdlib.h>
76 #endif
77 #ifdef HAVE_SYS_STAT_H
78 #include <sys/stat.h>
79 #endif
80 #ifdef HAVE_FCNTL_H
81 #include <fcntl.h>
82 #endif
83 #ifdef HAVE_UNISTD_H
84 #include <unistd.h>
85 #endif
86
87 #include "buf.h"
88 #include "enc.h"
89
90 static void
91 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93 static xmlParserCtxtPtr
94 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
99 /************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105 #define XML_PARSER_BIG_ENTITY 1000
106 #define XML_PARSER_LOT_ENTITY 5000
107
108 /*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114 #define XML_PARSER_NON_LINEAR 10
115
116 /*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,size_t size,xmlEntityPtr ent,size_t replacement)126 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
128 {
129 size_t consumed = 0;
130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
148 ++ctxt->depth;
149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
151 --ctxt->depth;
152 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
153 ent->content[0] = 0;
154 }
155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
204 size = ent->checked / 2;
205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
223 * strange we got no data for checking
224 */
225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
229 }
230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232 }
233
234 /**
235 * xmlParserMaxDepth:
236 *
237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
241 */
242 unsigned int xmlParserMaxDepth = 256;
243
244
245
246 #define SAX2 1
247 #define XML_PARSER_BIG_BUFFER_SIZE 300
248 #define XML_PARSER_BUFFER_SIZE 100
249 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
251 /**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260 #define XML_PARSER_CHUNK_SIZE 100
261
262 /*
263 * List of XML prefixed PI allowed by W3C specs
264 */
265
266 static const char *xmlW3CPIs[] = {
267 "xml-stylesheet",
268 "xml-model",
269 NULL
270 };
271
272
273 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
276
277 static xmlParserErrors
278 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
280 void *user_data, int depth, const xmlChar *URL,
281 const xmlChar *ID, xmlNodePtr *list);
282
283 static int
284 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
286 #ifdef LIBXML_LEGACY_ENABLED
287 static void
288 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
290 #endif /* LIBXML_LEGACY_ENABLED */
291
292 static xmlParserErrors
293 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
295
296 static int
297 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
299 /************************************************************************
300 * *
301 * Some factorized error routines *
302 * *
303 ************************************************************************/
304
305 /**
306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)314 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316 {
317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
322
323 if (prefix == NULL)
324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
328 else
329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
339 }
340
341 /**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)350 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
351 {
352 const char *errmsg;
353
354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
359 errmsg = "CharRef: invalid hexadecimal value";
360 break;
361 case XML_ERR_INVALID_DEC_CHARREF:
362 errmsg = "CharRef: invalid decimal value";
363 break;
364 case XML_ERR_INVALID_CHARREF:
365 errmsg = "CharRef: invalid value";
366 break;
367 case XML_ERR_INTERNAL_ERROR:
368 errmsg = "internal error";
369 break;
370 case XML_ERR_PEREF_AT_EOF:
371 errmsg = "PEReference at end of document";
372 break;
373 case XML_ERR_PEREF_IN_PROLOG:
374 errmsg = "PEReference in prolog";
375 break;
376 case XML_ERR_PEREF_IN_EPILOG:
377 errmsg = "PEReference in epilog";
378 break;
379 case XML_ERR_PEREF_NO_NAME:
380 errmsg = "PEReference: no name";
381 break;
382 case XML_ERR_PEREF_SEMICOL_MISSING:
383 errmsg = "PEReference: expecting ';'";
384 break;
385 case XML_ERR_ENTITY_LOOP:
386 errmsg = "Detected an entity reference loop";
387 break;
388 case XML_ERR_ENTITY_NOT_STARTED:
389 errmsg = "EntityValue: \" or ' expected";
390 break;
391 case XML_ERR_ENTITY_PE_INTERNAL:
392 errmsg = "PEReferences forbidden in internal subset";
393 break;
394 case XML_ERR_ENTITY_NOT_FINISHED:
395 errmsg = "EntityValue: \" or ' expected";
396 break;
397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
398 errmsg = "AttValue: \" or ' expected";
399 break;
400 case XML_ERR_LT_IN_ATTRIBUTE:
401 errmsg = "Unescaped '<' not allowed in attributes values";
402 break;
403 case XML_ERR_LITERAL_NOT_STARTED:
404 errmsg = "SystemLiteral \" or ' expected";
405 break;
406 case XML_ERR_LITERAL_NOT_FINISHED:
407 errmsg = "Unfinished System or Public ID \" or ' expected";
408 break;
409 case XML_ERR_MISPLACED_CDATA_END:
410 errmsg = "Sequence ']]>' not allowed in content";
411 break;
412 case XML_ERR_URI_REQUIRED:
413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
414 break;
415 case XML_ERR_PUBID_REQUIRED:
416 errmsg = "PUBLIC, the Public Identifier is missing";
417 break;
418 case XML_ERR_HYPHEN_IN_COMMENT:
419 errmsg = "Comment must not contain '--' (double-hyphen)";
420 break;
421 case XML_ERR_PI_NOT_STARTED:
422 errmsg = "xmlParsePI : no target name";
423 break;
424 case XML_ERR_RESERVED_XML_NAME:
425 errmsg = "Invalid PI name";
426 break;
427 case XML_ERR_NOTATION_NOT_STARTED:
428 errmsg = "NOTATION: Name expected here";
429 break;
430 case XML_ERR_NOTATION_NOT_FINISHED:
431 errmsg = "'>' required to close NOTATION declaration";
432 break;
433 case XML_ERR_VALUE_REQUIRED:
434 errmsg = "Entity value required";
435 break;
436 case XML_ERR_URI_FRAGMENT:
437 errmsg = "Fragment not allowed";
438 break;
439 case XML_ERR_ATTLIST_NOT_STARTED:
440 errmsg = "'(' required to start ATTLIST enumeration";
441 break;
442 case XML_ERR_NMTOKEN_REQUIRED:
443 errmsg = "NmToken expected in ATTLIST enumeration";
444 break;
445 case XML_ERR_ATTLIST_NOT_FINISHED:
446 errmsg = "')' required to finish ATTLIST enumeration";
447 break;
448 case XML_ERR_MIXED_NOT_STARTED:
449 errmsg = "MixedContentDecl : '|' or ')*' expected";
450 break;
451 case XML_ERR_PCDATA_REQUIRED:
452 errmsg = "MixedContentDecl : '#PCDATA' expected";
453 break;
454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
455 errmsg = "ContentDecl : Name or '(' expected";
456 break;
457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458 errmsg = "ContentDecl : ',' '|' or ')' expected";
459 break;
460 case XML_ERR_PEREF_IN_INT_SUBSET:
461 errmsg =
462 "PEReference: forbidden within markup decl in internal subset";
463 break;
464 case XML_ERR_GT_REQUIRED:
465 errmsg = "expected '>'";
466 break;
467 case XML_ERR_CONDSEC_INVALID:
468 errmsg = "XML conditional section '[' expected";
469 break;
470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471 errmsg = "Content error in the external subset";
472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
475 "conditional section INCLUDE or IGNORE keyword expected";
476 break;
477 case XML_ERR_CONDSEC_NOT_FINISHED:
478 errmsg = "XML conditional section not closed";
479 break;
480 case XML_ERR_XMLDECL_NOT_STARTED:
481 errmsg = "Text declaration '<?xml' required";
482 break;
483 case XML_ERR_XMLDECL_NOT_FINISHED:
484 errmsg = "parsing XML declaration: '?>' expected";
485 break;
486 case XML_ERR_EXT_ENTITY_STANDALONE:
487 errmsg = "external parsed entities cannot be standalone";
488 break;
489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490 errmsg = "EntityRef: expecting ';'";
491 break;
492 case XML_ERR_DOCTYPE_NOT_FINISHED:
493 errmsg = "DOCTYPE improperly terminated";
494 break;
495 case XML_ERR_LTSLASH_REQUIRED:
496 errmsg = "EndTag: '</' not found";
497 break;
498 case XML_ERR_EQUAL_REQUIRED:
499 errmsg = "expected '='";
500 break;
501 case XML_ERR_STRING_NOT_CLOSED:
502 errmsg = "String not closed expecting \" or '";
503 break;
504 case XML_ERR_STRING_NOT_STARTED:
505 errmsg = "String not started expecting ' or \"";
506 break;
507 case XML_ERR_ENCODING_NAME:
508 errmsg = "Invalid XML encoding name";
509 break;
510 case XML_ERR_STANDALONE_VALUE:
511 errmsg = "standalone accepts only 'yes' or 'no'";
512 break;
513 case XML_ERR_DOCUMENT_EMPTY:
514 errmsg = "Document is empty";
515 break;
516 case XML_ERR_DOCUMENT_END:
517 errmsg = "Extra content at the end of the document";
518 break;
519 case XML_ERR_NOT_WELL_BALANCED:
520 errmsg = "chunk is not well balanced";
521 break;
522 case XML_ERR_EXTRA_CONTENT:
523 errmsg = "extra content at the end of well balanced chunk";
524 break;
525 case XML_ERR_VERSION_MISSING:
526 errmsg = "Malformed declaration expecting version";
527 break;
528 case XML_ERR_NAME_TOO_LONG:
529 errmsg = "Name too long use XML_PARSE_HUGE option";
530 break;
531 #if 0
532 case:
533 errmsg = "";
534 break;
535 #endif
536 default:
537 errmsg = "Unregistered error message";
538 }
539 if (ctxt != NULL)
540 ctxt->errNo = error;
541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555 }
556
557 /**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)566 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
568 {
569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
572 if (ctxt != NULL)
573 ctxt->errNo = error;
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
581 }
582
583 /**
584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
593 static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)594 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596 {
597 xmlStructuredErrorFunc schannel = NULL;
598
599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
604 schannel = ctxt->sax->serror;
605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
620 }
621
622 /**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
629 * Handle a validity error.
630 */
631 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)632 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633 const char *msg, const xmlChar *str1, const xmlChar *str2)
634 {
635 xmlStructuredErrorFunc schannel = NULL;
636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
647 ctxt->vctxt.error, ctxt->vctxt.userData,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 ctxt->valid = 0;
653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
659 }
660 }
661
662 /**
663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
671 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)672 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673 const char *msg, int val)
674 {
675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
678 if (ctxt != NULL)
679 ctxt->errNo = error;
680 __xmlRaiseError(NULL, NULL, NULL,
681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
688 }
689
690 /**
691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
701 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)702 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703 const char *msg, const xmlChar *str1, int val,
704 const xmlChar *str2)
705 {
706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
709 if (ctxt != NULL)
710 ctxt->errNo = error;
711 __xmlRaiseError(NULL, NULL, NULL,
712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
720 }
721
722 /**
723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
731 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)732 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733 const char *msg, const xmlChar * val)
734 {
735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
738 if (ctxt != NULL)
739 ctxt->errNo = error;
740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
749 }
750
751 /**
752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
760 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)761 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763 {
764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
767 if (ctxt != NULL)
768 ctxt->errNo = error;
769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773 }
774
775 /**
776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
785 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)786 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
790 {
791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
794 if (ctxt != NULL)
795 ctxt->errNo = error;
796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
802 }
803
804 /**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
812 * Handle a namespace warning error
813 */
814 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)815 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819 {
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827 }
828
829 /************************************************************************
830 * *
831 * Library wide options *
832 * *
833 ************************************************************************/
834
835 /**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845 int
xmlHasFeature(xmlFeature feature)846 xmlHasFeature(xmlFeature feature)
847 {
848 switch (feature) {
849 case XML_WITH_THREAD:
850 #ifdef LIBXML_THREAD_ENABLED
851 return(1);
852 #else
853 return(0);
854 #endif
855 case XML_WITH_TREE:
856 #ifdef LIBXML_TREE_ENABLED
857 return(1);
858 #else
859 return(0);
860 #endif
861 case XML_WITH_OUTPUT:
862 #ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864 #else
865 return(0);
866 #endif
867 case XML_WITH_PUSH:
868 #ifdef LIBXML_PUSH_ENABLED
869 return(1);
870 #else
871 return(0);
872 #endif
873 case XML_WITH_READER:
874 #ifdef LIBXML_READER_ENABLED
875 return(1);
876 #else
877 return(0);
878 #endif
879 case XML_WITH_PATTERN:
880 #ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882 #else
883 return(0);
884 #endif
885 case XML_WITH_WRITER:
886 #ifdef LIBXML_WRITER_ENABLED
887 return(1);
888 #else
889 return(0);
890 #endif
891 case XML_WITH_SAX1:
892 #ifdef LIBXML_SAX1_ENABLED
893 return(1);
894 #else
895 return(0);
896 #endif
897 case XML_WITH_FTP:
898 #ifdef LIBXML_FTP_ENABLED
899 return(1);
900 #else
901 return(0);
902 #endif
903 case XML_WITH_HTTP:
904 #ifdef LIBXML_HTTP_ENABLED
905 return(1);
906 #else
907 return(0);
908 #endif
909 case XML_WITH_VALID:
910 #ifdef LIBXML_VALID_ENABLED
911 return(1);
912 #else
913 return(0);
914 #endif
915 case XML_WITH_HTML:
916 #ifdef LIBXML_HTML_ENABLED
917 return(1);
918 #else
919 return(0);
920 #endif
921 case XML_WITH_LEGACY:
922 #ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924 #else
925 return(0);
926 #endif
927 case XML_WITH_C14N:
928 #ifdef LIBXML_C14N_ENABLED
929 return(1);
930 #else
931 return(0);
932 #endif
933 case XML_WITH_CATALOG:
934 #ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936 #else
937 return(0);
938 #endif
939 case XML_WITH_XPATH:
940 #ifdef LIBXML_XPATH_ENABLED
941 return(1);
942 #else
943 return(0);
944 #endif
945 case XML_WITH_XPTR:
946 #ifdef LIBXML_XPTR_ENABLED
947 return(1);
948 #else
949 return(0);
950 #endif
951 case XML_WITH_XINCLUDE:
952 #ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954 #else
955 return(0);
956 #endif
957 case XML_WITH_ICONV:
958 #ifdef LIBXML_ICONV_ENABLED
959 return(1);
960 #else
961 return(0);
962 #endif
963 case XML_WITH_ISO8859X:
964 #ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966 #else
967 return(0);
968 #endif
969 case XML_WITH_UNICODE:
970 #ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972 #else
973 return(0);
974 #endif
975 case XML_WITH_REGEXP:
976 #ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978 #else
979 return(0);
980 #endif
981 case XML_WITH_AUTOMATA:
982 #ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984 #else
985 return(0);
986 #endif
987 case XML_WITH_EXPR:
988 #ifdef LIBXML_EXPR_ENABLED
989 return(1);
990 #else
991 return(0);
992 #endif
993 case XML_WITH_SCHEMAS:
994 #ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996 #else
997 return(0);
998 #endif
999 case XML_WITH_SCHEMATRON:
1000 #ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002 #else
1003 return(0);
1004 #endif
1005 case XML_WITH_MODULES:
1006 #ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008 #else
1009 return(0);
1010 #endif
1011 case XML_WITH_DEBUG:
1012 #ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014 #else
1015 return(0);
1016 #endif
1017 case XML_WITH_DEBUG_MEM:
1018 #ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020 #else
1021 return(0);
1022 #endif
1023 case XML_WITH_DEBUG_RUN:
1024 #ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026 #else
1027 return(0);
1028 #endif
1029 case XML_WITH_ZLIB:
1030 #ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032 #else
1033 return(0);
1034 #endif
1035 case XML_WITH_LZMA:
1036 #ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038 #else
1039 return(0);
1040 #endif
1041 case XML_WITH_ICU:
1042 #ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044 #else
1045 return(0);
1046 #endif
1047 default:
1048 break;
1049 }
1050 return(0);
1051 }
1052
1053 /************************************************************************
1054 * *
1055 * SAX2 defaulted attributes handling *
1056 * *
1057 ************************************************************************/
1058
1059 /**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)1066 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
1068 #ifdef LIBXML_SAX1_ENABLED
1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072 #else
1073 ctxt->sax2 = 1;
1074 #endif /* LIBXML_SAX1_ENABLED */
1075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
1081 xmlErrMemory(ctxt, NULL);
1082 }
1083 }
1084
1085 typedef struct _xmlDefAttrs xmlDefAttrs;
1086 typedef xmlDefAttrs *xmlDefAttrsPtr;
1087 struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
1090 #if __STDC_VERSION__ >= 199901L
1091 /* Using a C99 flexible array member avoids UBSan errors. */
1092 const xmlChar *values[]; /* array of localname/prefix/values/external */
1093 #else
1094 const xmlChar *values[5];
1095 #endif
1096 };
1097
1098 /**
1099 * xmlAttrNormalizeSpace:
1100 * @src: the source string
1101 * @dst: the target string
1102 *
1103 * Normalize the space in non CDATA attribute values:
1104 * If the attribute type is not CDATA, then the XML processor MUST further
1105 * process the normalized attribute value by discarding any leading and
1106 * trailing space (#x20) characters, and by replacing sequences of space
1107 * (#x20) characters by a single space (#x20) character.
1108 * Note that the size of dst need to be at least src, and if one doesn't need
1109 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1110 * passing src as dst is just fine.
1111 *
1112 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1113 * is needed.
1114 */
1115 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1116 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1117 {
1118 if ((src == NULL) || (dst == NULL))
1119 return(NULL);
1120
1121 while (*src == 0x20) src++;
1122 while (*src != 0) {
1123 if (*src == 0x20) {
1124 while (*src == 0x20) src++;
1125 if (*src != 0)
1126 *dst++ = 0x20;
1127 } else {
1128 *dst++ = *src++;
1129 }
1130 }
1131 *dst = 0;
1132 if (dst == src)
1133 return(NULL);
1134 return(dst);
1135 }
1136
1137 /**
1138 * xmlAttrNormalizeSpace2:
1139 * @src: the source string
1140 *
1141 * Normalize the space in non CDATA attribute values, a slightly more complex
1142 * front end to avoid allocation problems when running on attribute values
1143 * coming from the input.
1144 *
1145 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1146 * is needed.
1147 */
1148 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1149 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1150 {
1151 int i;
1152 int remove_head = 0;
1153 int need_realloc = 0;
1154 const xmlChar *cur;
1155
1156 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1157 return(NULL);
1158 i = *len;
1159 if (i <= 0)
1160 return(NULL);
1161
1162 cur = src;
1163 while (*cur == 0x20) {
1164 cur++;
1165 remove_head++;
1166 }
1167 while (*cur != 0) {
1168 if (*cur == 0x20) {
1169 cur++;
1170 if ((*cur == 0x20) || (*cur == 0)) {
1171 need_realloc = 1;
1172 break;
1173 }
1174 } else
1175 cur++;
1176 }
1177 if (need_realloc) {
1178 xmlChar *ret;
1179
1180 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1181 if (ret == NULL) {
1182 xmlErrMemory(ctxt, NULL);
1183 return(NULL);
1184 }
1185 xmlAttrNormalizeSpace(ret, ret);
1186 *len = (int) strlen((const char *)ret);
1187 return(ret);
1188 } else if (remove_head) {
1189 *len -= remove_head;
1190 memmove(src, src + remove_head, 1 + *len);
1191 return(src);
1192 }
1193 return(NULL);
1194 }
1195
1196 /**
1197 * xmlAddDefAttrs:
1198 * @ctxt: an XML parser context
1199 * @fullname: the element fullname
1200 * @fullattr: the attribute fullname
1201 * @value: the attribute value
1202 *
1203 * Add a defaulted attribute for an element
1204 */
1205 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1206 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1207 const xmlChar *fullname,
1208 const xmlChar *fullattr,
1209 const xmlChar *value) {
1210 xmlDefAttrsPtr defaults;
1211 int len;
1212 const xmlChar *name;
1213 const xmlChar *prefix;
1214
1215 /*
1216 * Allows to detect attribute redefinitions
1217 */
1218 if (ctxt->attsSpecial != NULL) {
1219 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1220 return;
1221 }
1222
1223 if (ctxt->attsDefault == NULL) {
1224 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1225 if (ctxt->attsDefault == NULL)
1226 goto mem_error;
1227 }
1228
1229 /*
1230 * split the element name into prefix:localname , the string found
1231 * are within the DTD and then not associated to namespace names.
1232 */
1233 name = xmlSplitQName3(fullname, &len);
1234 if (name == NULL) {
1235 name = xmlDictLookup(ctxt->dict, fullname, -1);
1236 prefix = NULL;
1237 } else {
1238 name = xmlDictLookup(ctxt->dict, name, -1);
1239 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1240 }
1241
1242 /*
1243 * make sure there is some storage
1244 */
1245 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1246 if (defaults == NULL) {
1247 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1248 (4 * 5) * sizeof(const xmlChar *));
1249 if (defaults == NULL)
1250 goto mem_error;
1251 defaults->nbAttrs = 0;
1252 defaults->maxAttrs = 4;
1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1257 }
1258 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1259 xmlDefAttrsPtr temp;
1260
1261 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1262 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1263 if (temp == NULL)
1264 goto mem_error;
1265 defaults = temp;
1266 defaults->maxAttrs *= 2;
1267 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1268 defaults, NULL) < 0) {
1269 xmlFree(defaults);
1270 goto mem_error;
1271 }
1272 }
1273
1274 /*
1275 * Split the element name into prefix:localname , the string found
1276 * are within the DTD and hen not associated to namespace names.
1277 */
1278 name = xmlSplitQName3(fullattr, &len);
1279 if (name == NULL) {
1280 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1281 prefix = NULL;
1282 } else {
1283 name = xmlDictLookup(ctxt->dict, name, -1);
1284 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1285 }
1286
1287 defaults->values[5 * defaults->nbAttrs] = name;
1288 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1289 /* intern the string and precompute the end */
1290 len = xmlStrlen(value);
1291 value = xmlDictLookup(ctxt->dict, value, len);
1292 defaults->values[5 * defaults->nbAttrs + 2] = value;
1293 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1294 if (ctxt->external)
1295 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1296 else
1297 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1298 defaults->nbAttrs++;
1299
1300 return;
1301
1302 mem_error:
1303 xmlErrMemory(ctxt, NULL);
1304 return;
1305 }
1306
1307 /**
1308 * xmlAddSpecialAttr:
1309 * @ctxt: an XML parser context
1310 * @fullname: the element fullname
1311 * @fullattr: the attribute fullname
1312 * @type: the attribute type
1313 *
1314 * Register this attribute type
1315 */
1316 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1317 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1318 const xmlChar *fullname,
1319 const xmlChar *fullattr,
1320 int type)
1321 {
1322 if (ctxt->attsSpecial == NULL) {
1323 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1324 if (ctxt->attsSpecial == NULL)
1325 goto mem_error;
1326 }
1327
1328 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1329 return;
1330
1331 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1332 (void *) (ptrdiff_t) type);
1333 return;
1334
1335 mem_error:
1336 xmlErrMemory(ctxt, NULL);
1337 return;
1338 }
1339
1340 /**
1341 * xmlCleanSpecialAttrCallback:
1342 *
1343 * Removes CDATA attributes from the special attribute table
1344 */
1345 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1346 xmlCleanSpecialAttrCallback(void *payload, void *data,
1347 const xmlChar *fullname, const xmlChar *fullattr,
1348 const xmlChar *unused ATTRIBUTE_UNUSED) {
1349 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1350
1351 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1352 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353 }
1354 }
1355
1356 /**
1357 * xmlCleanSpecialAttr:
1358 * @ctxt: an XML parser context
1359 *
1360 * Trim the list of attributes defined to remove all those of type
1361 * CDATA as they are not special. This call should be done when finishing
1362 * to parse the DTD and before starting to parse the document root.
1363 */
1364 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1365 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1366 {
1367 if (ctxt->attsSpecial == NULL)
1368 return;
1369
1370 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1371
1372 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1373 xmlHashFree(ctxt->attsSpecial, NULL);
1374 ctxt->attsSpecial = NULL;
1375 }
1376 return;
1377 }
1378
1379 /**
1380 * xmlCheckLanguageID:
1381 * @lang: pointer to the string value
1382 *
1383 * Checks that the value conforms to the LanguageID production:
1384 *
1385 * NOTE: this is somewhat deprecated, those productions were removed from
1386 * the XML Second edition.
1387 *
1388 * [33] LanguageID ::= Langcode ('-' Subcode)*
1389 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1390 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1391 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1392 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1393 * [38] Subcode ::= ([a-z] | [A-Z])+
1394 *
1395 * The current REC reference the sucessors of RFC 1766, currently 5646
1396 *
1397 * http://www.rfc-editor.org/rfc/rfc5646.txt
1398 * langtag = language
1399 * ["-" script]
1400 * ["-" region]
1401 * *("-" variant)
1402 * *("-" extension)
1403 * ["-" privateuse]
1404 * language = 2*3ALPHA ; shortest ISO 639 code
1405 * ["-" extlang] ; sometimes followed by
1406 * ; extended language subtags
1407 * / 4ALPHA ; or reserved for future use
1408 * / 5*8ALPHA ; or registered language subtag
1409 *
1410 * extlang = 3ALPHA ; selected ISO 639 codes
1411 * *2("-" 3ALPHA) ; permanently reserved
1412 *
1413 * script = 4ALPHA ; ISO 15924 code
1414 *
1415 * region = 2ALPHA ; ISO 3166-1 code
1416 * / 3DIGIT ; UN M.49 code
1417 *
1418 * variant = 5*8alphanum ; registered variants
1419 * / (DIGIT 3alphanum)
1420 *
1421 * extension = singleton 1*("-" (2*8alphanum))
1422 *
1423 * ; Single alphanumerics
1424 * ; "x" reserved for private use
1425 * singleton = DIGIT ; 0 - 9
1426 * / %x41-57 ; A - W
1427 * / %x59-5A ; Y - Z
1428 * / %x61-77 ; a - w
1429 * / %x79-7A ; y - z
1430 *
1431 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1432 * The parser below doesn't try to cope with extension or privateuse
1433 * that could be added but that's not interoperable anyway
1434 *
1435 * Returns 1 if correct 0 otherwise
1436 **/
1437 int
xmlCheckLanguageID(const xmlChar * lang)1438 xmlCheckLanguageID(const xmlChar * lang)
1439 {
1440 const xmlChar *cur = lang, *nxt;
1441
1442 if (cur == NULL)
1443 return (0);
1444 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1445 ((cur[0] == 'I') && (cur[1] == '-')) ||
1446 ((cur[0] == 'x') && (cur[1] == '-')) ||
1447 ((cur[0] == 'X') && (cur[1] == '-'))) {
1448 /*
1449 * Still allow IANA code and user code which were coming
1450 * from the previous version of the XML-1.0 specification
1451 * it's deprecated but we should not fail
1452 */
1453 cur += 2;
1454 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1455 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1456 cur++;
1457 return(cur[0] == 0);
1458 }
1459 nxt = cur;
1460 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1461 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1462 nxt++;
1463 if (nxt - cur >= 4) {
1464 /*
1465 * Reserved
1466 */
1467 if ((nxt - cur > 8) || (nxt[0] != 0))
1468 return(0);
1469 return(1);
1470 }
1471 if (nxt - cur < 2)
1472 return(0);
1473 /* we got an ISO 639 code */
1474 if (nxt[0] == 0)
1475 return(1);
1476 if (nxt[0] != '-')
1477 return(0);
1478
1479 nxt++;
1480 cur = nxt;
1481 /* now we can have extlang or script or region or variant */
1482 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1483 goto region_m49;
1484
1485 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1486 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487 nxt++;
1488 if (nxt - cur == 4)
1489 goto script;
1490 if (nxt - cur == 2)
1491 goto region;
1492 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1493 goto variant;
1494 if (nxt - cur != 3)
1495 return(0);
1496 /* we parsed an extlang */
1497 if (nxt[0] == 0)
1498 return(1);
1499 if (nxt[0] != '-')
1500 return(0);
1501
1502 nxt++;
1503 cur = nxt;
1504 /* now we can have script or region or variant */
1505 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1506 goto region_m49;
1507
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511 if (nxt - cur == 2)
1512 goto region;
1513 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1514 goto variant;
1515 if (nxt - cur != 4)
1516 return(0);
1517 /* we parsed a script */
1518 script:
1519 if (nxt[0] == 0)
1520 return(1);
1521 if (nxt[0] != '-')
1522 return(0);
1523
1524 nxt++;
1525 cur = nxt;
1526 /* now we can have region or variant */
1527 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528 goto region_m49;
1529
1530 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532 nxt++;
1533
1534 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535 goto variant;
1536 if (nxt - cur != 2)
1537 return(0);
1538 /* we parsed a region */
1539 region:
1540 if (nxt[0] == 0)
1541 return(1);
1542 if (nxt[0] != '-')
1543 return(0);
1544
1545 nxt++;
1546 cur = nxt;
1547 /* now we can just have a variant */
1548 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1549 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1550 nxt++;
1551
1552 if ((nxt - cur < 5) || (nxt - cur > 8))
1553 return(0);
1554
1555 /* we parsed a variant */
1556 variant:
1557 if (nxt[0] == 0)
1558 return(1);
1559 if (nxt[0] != '-')
1560 return(0);
1561 /* extensions and private use subtags not checked */
1562 return (1);
1563
1564 region_m49:
1565 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1566 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1567 nxt += 3;
1568 goto region;
1569 }
1570 return(0);
1571 }
1572
1573 /************************************************************************
1574 * *
1575 * Parser stacks related functions and macros *
1576 * *
1577 ************************************************************************/
1578
1579 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1580 const xmlChar ** str);
1581
1582 #ifdef SAX2
1583 /**
1584 * nsPush:
1585 * @ctxt: an XML parser context
1586 * @prefix: the namespace prefix or NULL
1587 * @URL: the namespace name
1588 *
1589 * Pushes a new parser namespace on top of the ns stack
1590 *
1591 * Returns -1 in case of error, -2 if the namespace should be discarded
1592 * and the index in the stack otherwise.
1593 */
1594 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1595 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1596 {
1597 if (ctxt->options & XML_PARSE_NSCLEAN) {
1598 int i;
1599 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1600 if (ctxt->nsTab[i] == prefix) {
1601 /* in scope */
1602 if (ctxt->nsTab[i + 1] == URL)
1603 return(-2);
1604 /* out of scope keep it */
1605 break;
1606 }
1607 }
1608 }
1609 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1610 ctxt->nsMax = 10;
1611 ctxt->nsNr = 0;
1612 ctxt->nsTab = (const xmlChar **)
1613 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1614 if (ctxt->nsTab == NULL) {
1615 xmlErrMemory(ctxt, NULL);
1616 ctxt->nsMax = 0;
1617 return (-1);
1618 }
1619 } else if (ctxt->nsNr >= ctxt->nsMax) {
1620 const xmlChar ** tmp;
1621 ctxt->nsMax *= 2;
1622 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1623 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1624 if (tmp == NULL) {
1625 xmlErrMemory(ctxt, NULL);
1626 ctxt->nsMax /= 2;
1627 return (-1);
1628 }
1629 ctxt->nsTab = tmp;
1630 }
1631 ctxt->nsTab[ctxt->nsNr++] = prefix;
1632 ctxt->nsTab[ctxt->nsNr++] = URL;
1633 return (ctxt->nsNr);
1634 }
1635 /**
1636 * nsPop:
1637 * @ctxt: an XML parser context
1638 * @nr: the number to pop
1639 *
1640 * Pops the top @nr parser prefix/namespace from the ns stack
1641 *
1642 * Returns the number of namespaces removed
1643 */
1644 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1645 nsPop(xmlParserCtxtPtr ctxt, int nr)
1646 {
1647 int i;
1648
1649 if (ctxt->nsTab == NULL) return(0);
1650 if (ctxt->nsNr < nr) {
1651 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1652 nr = ctxt->nsNr;
1653 }
1654 if (ctxt->nsNr <= 0)
1655 return (0);
1656
1657 for (i = 0;i < nr;i++) {
1658 ctxt->nsNr--;
1659 ctxt->nsTab[ctxt->nsNr] = NULL;
1660 }
1661 return(nr);
1662 }
1663 #endif
1664
1665 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1666 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1667 const xmlChar **atts;
1668 int *attallocs;
1669 int maxatts;
1670
1671 if (ctxt->atts == NULL) {
1672 maxatts = 55; /* allow for 10 attrs by default */
1673 atts = (const xmlChar **)
1674 xmlMalloc(maxatts * sizeof(xmlChar *));
1675 if (atts == NULL) goto mem_error;
1676 ctxt->atts = atts;
1677 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1678 if (attallocs == NULL) goto mem_error;
1679 ctxt->attallocs = attallocs;
1680 ctxt->maxatts = maxatts;
1681 } else if (nr + 5 > ctxt->maxatts) {
1682 maxatts = (nr + 5) * 2;
1683 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1684 maxatts * sizeof(const xmlChar *));
1685 if (atts == NULL) goto mem_error;
1686 ctxt->atts = atts;
1687 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1688 (maxatts / 5) * sizeof(int));
1689 if (attallocs == NULL) goto mem_error;
1690 ctxt->attallocs = attallocs;
1691 ctxt->maxatts = maxatts;
1692 }
1693 return(ctxt->maxatts);
1694 mem_error:
1695 xmlErrMemory(ctxt, NULL);
1696 return(-1);
1697 }
1698
1699 /**
1700 * inputPush:
1701 * @ctxt: an XML parser context
1702 * @value: the parser input
1703 *
1704 * Pushes a new parser input on top of the input stack
1705 *
1706 * Returns -1 in case of error, the index in the stack otherwise
1707 */
1708 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1709 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1710 {
1711 if ((ctxt == NULL) || (value == NULL))
1712 return(-1);
1713 if (ctxt->inputNr >= ctxt->inputMax) {
1714 ctxt->inputMax *= 2;
1715 ctxt->inputTab =
1716 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1717 ctxt->inputMax *
1718 sizeof(ctxt->inputTab[0]));
1719 if (ctxt->inputTab == NULL) {
1720 xmlErrMemory(ctxt, NULL);
1721 xmlFreeInputStream(value);
1722 ctxt->inputMax /= 2;
1723 value = NULL;
1724 return (-1);
1725 }
1726 }
1727 ctxt->inputTab[ctxt->inputNr] = value;
1728 ctxt->input = value;
1729 return (ctxt->inputNr++);
1730 }
1731 /**
1732 * inputPop:
1733 * @ctxt: an XML parser context
1734 *
1735 * Pops the top parser input from the input stack
1736 *
1737 * Returns the input just removed
1738 */
1739 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1740 inputPop(xmlParserCtxtPtr ctxt)
1741 {
1742 xmlParserInputPtr ret;
1743
1744 if (ctxt == NULL)
1745 return(NULL);
1746 if (ctxt->inputNr <= 0)
1747 return (NULL);
1748 ctxt->inputNr--;
1749 if (ctxt->inputNr > 0)
1750 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1751 else
1752 ctxt->input = NULL;
1753 ret = ctxt->inputTab[ctxt->inputNr];
1754 ctxt->inputTab[ctxt->inputNr] = NULL;
1755 return (ret);
1756 }
1757 /**
1758 * nodePush:
1759 * @ctxt: an XML parser context
1760 * @value: the element node
1761 *
1762 * Pushes a new element node on top of the node stack
1763 *
1764 * Returns -1 in case of error, the index in the stack otherwise
1765 */
1766 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1767 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1768 {
1769 if (ctxt == NULL) return(0);
1770 if (ctxt->nodeNr >= ctxt->nodeMax) {
1771 xmlNodePtr *tmp;
1772
1773 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1774 ctxt->nodeMax * 2 *
1775 sizeof(ctxt->nodeTab[0]));
1776 if (tmp == NULL) {
1777 xmlErrMemory(ctxt, NULL);
1778 return (-1);
1779 }
1780 ctxt->nodeTab = tmp;
1781 ctxt->nodeMax *= 2;
1782 }
1783 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1784 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1785 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1786 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1787 xmlParserMaxDepth);
1788 xmlHaltParser(ctxt);
1789 return(-1);
1790 }
1791 ctxt->nodeTab[ctxt->nodeNr] = value;
1792 ctxt->node = value;
1793 return (ctxt->nodeNr++);
1794 }
1795
1796 /**
1797 * nodePop:
1798 * @ctxt: an XML parser context
1799 *
1800 * Pops the top element node from the node stack
1801 *
1802 * Returns the node just removed
1803 */
1804 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1805 nodePop(xmlParserCtxtPtr ctxt)
1806 {
1807 xmlNodePtr ret;
1808
1809 if (ctxt == NULL) return(NULL);
1810 if (ctxt->nodeNr <= 0)
1811 return (NULL);
1812 ctxt->nodeNr--;
1813 if (ctxt->nodeNr > 0)
1814 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1815 else
1816 ctxt->node = NULL;
1817 ret = ctxt->nodeTab[ctxt->nodeNr];
1818 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1819 return (ret);
1820 }
1821
1822 #ifdef LIBXML_PUSH_ENABLED
1823 /**
1824 * nameNsPush:
1825 * @ctxt: an XML parser context
1826 * @value: the element name
1827 * @prefix: the element prefix
1828 * @URI: the element namespace name
1829 *
1830 * Pushes a new element name/prefix/URL on top of the name stack
1831 *
1832 * Returns -1 in case of error, the index in the stack otherwise
1833 */
1834 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1835 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1836 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1837 {
1838 if (ctxt->nameNr >= ctxt->nameMax) {
1839 const xmlChar * *tmp;
1840 void **tmp2;
1841 ctxt->nameMax *= 2;
1842 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843 ctxt->nameMax *
1844 sizeof(ctxt->nameTab[0]));
1845 if (tmp == NULL) {
1846 ctxt->nameMax /= 2;
1847 goto mem_error;
1848 }
1849 ctxt->nameTab = tmp;
1850 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1851 ctxt->nameMax * 3 *
1852 sizeof(ctxt->pushTab[0]));
1853 if (tmp2 == NULL) {
1854 ctxt->nameMax /= 2;
1855 goto mem_error;
1856 }
1857 ctxt->pushTab = tmp2;
1858 }
1859 ctxt->nameTab[ctxt->nameNr] = value;
1860 ctxt->name = value;
1861 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1862 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1863 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
1864 return (ctxt->nameNr++);
1865 mem_error:
1866 xmlErrMemory(ctxt, NULL);
1867 return (-1);
1868 }
1869 /**
1870 * nameNsPop:
1871 * @ctxt: an XML parser context
1872 *
1873 * Pops the top element/prefix/URI name from the name stack
1874 *
1875 * Returns the name just removed
1876 */
1877 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1878 nameNsPop(xmlParserCtxtPtr ctxt)
1879 {
1880 const xmlChar *ret;
1881
1882 if (ctxt->nameNr <= 0)
1883 return (NULL);
1884 ctxt->nameNr--;
1885 if (ctxt->nameNr > 0)
1886 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1887 else
1888 ctxt->name = NULL;
1889 ret = ctxt->nameTab[ctxt->nameNr];
1890 ctxt->nameTab[ctxt->nameNr] = NULL;
1891 return (ret);
1892 }
1893 #endif /* LIBXML_PUSH_ENABLED */
1894
1895 /**
1896 * namePush:
1897 * @ctxt: an XML parser context
1898 * @value: the element name
1899 *
1900 * Pushes a new element name on top of the name stack
1901 *
1902 * Returns -1 in case of error, the index in the stack otherwise
1903 */
1904 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1905 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1906 {
1907 if (ctxt == NULL) return (-1);
1908
1909 if (ctxt->nameNr >= ctxt->nameMax) {
1910 const xmlChar * *tmp;
1911 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1912 ctxt->nameMax * 2 *
1913 sizeof(ctxt->nameTab[0]));
1914 if (tmp == NULL) {
1915 goto mem_error;
1916 }
1917 ctxt->nameTab = tmp;
1918 ctxt->nameMax *= 2;
1919 }
1920 ctxt->nameTab[ctxt->nameNr] = value;
1921 ctxt->name = value;
1922 return (ctxt->nameNr++);
1923 mem_error:
1924 xmlErrMemory(ctxt, NULL);
1925 return (-1);
1926 }
1927 /**
1928 * namePop:
1929 * @ctxt: an XML parser context
1930 *
1931 * Pops the top element name from the name stack
1932 *
1933 * Returns the name just removed
1934 */
1935 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1936 namePop(xmlParserCtxtPtr ctxt)
1937 {
1938 const xmlChar *ret;
1939
1940 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1941 return (NULL);
1942 ctxt->nameNr--;
1943 if (ctxt->nameNr > 0)
1944 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945 else
1946 ctxt->name = NULL;
1947 ret = ctxt->nameTab[ctxt->nameNr];
1948 ctxt->nameTab[ctxt->nameNr] = NULL;
1949 return (ret);
1950 }
1951
spacePush(xmlParserCtxtPtr ctxt,int val)1952 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1953 if (ctxt->spaceNr >= ctxt->spaceMax) {
1954 int *tmp;
1955
1956 ctxt->spaceMax *= 2;
1957 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1958 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1959 if (tmp == NULL) {
1960 xmlErrMemory(ctxt, NULL);
1961 ctxt->spaceMax /=2;
1962 return(-1);
1963 }
1964 ctxt->spaceTab = tmp;
1965 }
1966 ctxt->spaceTab[ctxt->spaceNr] = val;
1967 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1968 return(ctxt->spaceNr++);
1969 }
1970
spacePop(xmlParserCtxtPtr ctxt)1971 static int spacePop(xmlParserCtxtPtr ctxt) {
1972 int ret;
1973 if (ctxt->spaceNr <= 0) return(0);
1974 ctxt->spaceNr--;
1975 if (ctxt->spaceNr > 0)
1976 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1977 else
1978 ctxt->space = &ctxt->spaceTab[0];
1979 ret = ctxt->spaceTab[ctxt->spaceNr];
1980 ctxt->spaceTab[ctxt->spaceNr] = -1;
1981 return(ret);
1982 }
1983
1984 /*
1985 * Macros for accessing the content. Those should be used only by the parser,
1986 * and not exported.
1987 *
1988 * Dirty macros, i.e. one often need to make assumption on the context to
1989 * use them
1990 *
1991 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1992 * To be used with extreme caution since operations consuming
1993 * characters may move the input buffer to a different location !
1994 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1995 * This should be used internally by the parser
1996 * only to compare to ASCII values otherwise it would break when
1997 * running with UTF-8 encoding.
1998 * RAW same as CUR but in the input buffer, bypass any token
1999 * extraction that may have been done
2000 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2001 * to compare on ASCII based substring.
2002 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2003 * strings without newlines within the parser.
2004 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2005 * defined char within the parser.
2006 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2007 *
2008 * NEXT Skip to the next character, this does the proper decoding
2009 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2010 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2011 * CUR_CHAR(l) returns the current unicode character (int), set l
2012 * to the number of xmlChars used for the encoding [0-5].
2013 * CUR_SCHAR same but operate on a string instead of the context
2014 * COPY_BUF copy the current unicode char to the target buffer, increment
2015 * the index
2016 * GROW, SHRINK handling of input buffers
2017 */
2018
2019 #define RAW (*ctxt->input->cur)
2020 #define CUR (*ctxt->input->cur)
2021 #define NXT(val) ctxt->input->cur[(val)]
2022 #define CUR_PTR ctxt->input->cur
2023 #define BASE_PTR ctxt->input->base
2024
2025 #define CMP4( s, c1, c2, c3, c4 ) \
2026 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2027 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2028 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2029 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2030 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2031 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2032 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2033 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2034 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2035 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2036 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2037 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2038 ((unsigned char *) s)[ 8 ] == c9 )
2039 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2040 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2041 ((unsigned char *) s)[ 9 ] == c10 )
2042
2043 #define SKIP(val) do { \
2044 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2045 if (*ctxt->input->cur == 0) \
2046 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2047 } while (0)
2048
2049 #define SKIPL(val) do { \
2050 int skipl; \
2051 for(skipl=0; skipl<val; skipl++) { \
2052 if (*(ctxt->input->cur) == '\n') { \
2053 ctxt->input->line++; ctxt->input->col = 1; \
2054 } else ctxt->input->col++; \
2055 ctxt->nbChars++; \
2056 ctxt->input->cur++; \
2057 } \
2058 if (*ctxt->input->cur == 0) \
2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2060 } while (0)
2061
2062 #define SHRINK if ((ctxt->progressive == 0) && \
2063 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2064 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2065 xmlSHRINK (ctxt);
2066
xmlSHRINK(xmlParserCtxtPtr ctxt)2067 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2068 xmlParserInputShrink(ctxt->input);
2069 if (*ctxt->input->cur == 0)
2070 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2071 }
2072
2073 #define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2075 xmlGROW (ctxt);
2076
xmlGROW(xmlParserCtxtPtr ctxt)2077 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083 ((ctxt->input->buf) &&
2084 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2085 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2087 xmlHaltParser(ctxt);
2088 return;
2089 }
2090 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2091 if ((ctxt->input->cur > ctxt->input->end) ||
2092 (ctxt->input->cur < ctxt->input->base)) {
2093 xmlHaltParser(ctxt);
2094 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095 return;
2096 }
2097 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2098 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2099 }
2100
2101 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103 #define NEXT xmlNextChar(ctxt)
2104
2105 #define NEXT1 { \
2106 ctxt->input->col++; \
2107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
2109 if (*ctxt->input->cur == 0) \
2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
2113 #define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
2117 ctxt->input->cur += l; \
2118 } while (0)
2119
2120 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122
2123 #define COPY_BUF(l,b,i,v) \
2124 if (l == 1) b[i++] = (xmlChar) v; \
2125 else i += xmlCopyCharMultiByte(&b[i],v)
2126
2127 /**
2128 * xmlSkipBlankChars:
2129 * @ctxt: the XML parser context
2130 *
2131 * skip all blanks character found at that point in the input streams.
2132 * It pops up finished entities in the process if allowable at that point.
2133 *
2134 * Returns the number of space chars skipped
2135 */
2136
2137 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2138 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2139 int res = 0;
2140
2141 /*
2142 * It's Okay to use CUR/NEXT here since all the blanks are on
2143 * the ASCII range.
2144 */
2145 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146 const xmlChar *cur;
2147 /*
2148 * if we are in the document content, go really fast
2149 */
2150 cur = ctxt->input->cur;
2151 while (IS_BLANK_CH(*cur)) {
2152 if (*cur == '\n') {
2153 ctxt->input->line++; ctxt->input->col = 1;
2154 } else {
2155 ctxt->input->col++;
2156 }
2157 cur++;
2158 res++;
2159 if (*cur == 0) {
2160 ctxt->input->cur = cur;
2161 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2162 cur = ctxt->input->cur;
2163 }
2164 }
2165 ctxt->input->cur = cur;
2166 } else {
2167 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168
2169 while (1) {
2170 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2171 NEXT;
2172 } else if (CUR == '%') {
2173 /*
2174 * Need to handle support of entities branching here
2175 */
2176 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177 break;
2178 xmlParsePEReference(ctxt);
2179 } else if (CUR == 0) {
2180 if (ctxt->inputNr <= 1)
2181 break;
2182 xmlPopInput(ctxt);
2183 } else {
2184 break;
2185 }
2186
2187 /*
2188 * Also increase the counter when entering or exiting a PERef.
2189 * The spec says: "When a parameter-entity reference is recognized
2190 * in the DTD and included, its replacement text MUST be enlarged
2191 * by the attachment of one leading and one following space (#x20)
2192 * character."
2193 */
2194 res++;
2195 }
2196 }
2197 return(res);
2198 }
2199
2200 /************************************************************************
2201 * *
2202 * Commodity functions to handle entities *
2203 * *
2204 ************************************************************************/
2205
2206 /**
2207 * xmlPopInput:
2208 * @ctxt: an XML parser context
2209 *
2210 * xmlPopInput: the current input pointed by ctxt->input came to an end
2211 * pop it and return the next char.
2212 *
2213 * Returns the current xmlChar in the parser context
2214 */
2215 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2216 xmlPopInput(xmlParserCtxtPtr ctxt) {
2217 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2218 if (xmlParserDebugEntities)
2219 xmlGenericError(xmlGenericErrorContext,
2220 "Popping input %d\n", ctxt->inputNr);
2221 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222 (ctxt->instate != XML_PARSER_EOF))
2223 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2224 "Unfinished entity outside the DTD");
2225 xmlFreeInputStream(inputPop(ctxt));
2226 if (*ctxt->input->cur == 0)
2227 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2228 return(CUR);
2229 }
2230
2231 /**
2232 * xmlPushInput:
2233 * @ctxt: an XML parser context
2234 * @input: an XML parser input fragment (entity, XML fragment ...).
2235 *
2236 * xmlPushInput: switch to a new input stream which is stacked on top
2237 * of the previous one(s).
2238 * Returns -1 in case of error or the index in the input stack
2239 */
2240 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2241 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2242 int ret;
2243 if (input == NULL) return(-1);
2244
2245 if (xmlParserDebugEntities) {
2246 if ((ctxt->input != NULL) && (ctxt->input->filename))
2247 xmlGenericError(xmlGenericErrorContext,
2248 "%s(%d): ", ctxt->input->filename,
2249 ctxt->input->line);
2250 xmlGenericError(xmlGenericErrorContext,
2251 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252 }
2253 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254 (ctxt->inputNr > 1024)) {
2255 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2256 while (ctxt->inputNr > 1)
2257 xmlFreeInputStream(inputPop(ctxt));
2258 return(-1);
2259 }
2260 ret = inputPush(ctxt, input);
2261 if (ctxt->instate == XML_PARSER_EOF)
2262 return(-1);
2263 GROW;
2264 return(ret);
2265 }
2266
2267 /**
2268 * xmlParseCharRef:
2269 * @ctxt: an XML parser context
2270 *
2271 * parse Reference declarations
2272 *
2273 * [66] CharRef ::= '&#' [0-9]+ ';' |
2274 * '&#x' [0-9a-fA-F]+ ';'
2275 *
2276 * [ WFC: Legal Character ]
2277 * Characters referred to using character references must match the
2278 * production for Char.
2279 *
2280 * Returns the value parsed (as an int), 0 in case of error
2281 */
2282 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2283 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2284 unsigned int val = 0;
2285 int count = 0;
2286 unsigned int outofrange = 0;
2287
2288 /*
2289 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2290 */
2291 if ((RAW == '&') && (NXT(1) == '#') &&
2292 (NXT(2) == 'x')) {
2293 SKIP(3);
2294 GROW;
2295 while (RAW != ';') { /* loop blocked by count */
2296 if (count++ > 20) {
2297 count = 0;
2298 GROW;
2299 if (ctxt->instate == XML_PARSER_EOF)
2300 return(0);
2301 }
2302 if ((RAW >= '0') && (RAW <= '9'))
2303 val = val * 16 + (CUR - '0');
2304 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2305 val = val * 16 + (CUR - 'a') + 10;
2306 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2307 val = val * 16 + (CUR - 'A') + 10;
2308 else {
2309 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2310 val = 0;
2311 break;
2312 }
2313 if (val > 0x10FFFF)
2314 outofrange = val;
2315
2316 NEXT;
2317 count++;
2318 }
2319 if (RAW == ';') {
2320 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2321 ctxt->input->col++;
2322 ctxt->nbChars ++;
2323 ctxt->input->cur++;
2324 }
2325 } else if ((RAW == '&') && (NXT(1) == '#')) {
2326 SKIP(2);
2327 GROW;
2328 while (RAW != ';') { /* loop blocked by count */
2329 if (count++ > 20) {
2330 count = 0;
2331 GROW;
2332 if (ctxt->instate == XML_PARSER_EOF)
2333 return(0);
2334 }
2335 if ((RAW >= '0') && (RAW <= '9'))
2336 val = val * 10 + (CUR - '0');
2337 else {
2338 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2339 val = 0;
2340 break;
2341 }
2342 if (val > 0x10FFFF)
2343 outofrange = val;
2344
2345 NEXT;
2346 count++;
2347 }
2348 if (RAW == ';') {
2349 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350 ctxt->input->col++;
2351 ctxt->nbChars ++;
2352 ctxt->input->cur++;
2353 }
2354 } else {
2355 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2356 }
2357
2358 /*
2359 * [ WFC: Legal Character ]
2360 * Characters referred to using character references must match the
2361 * production for Char.
2362 */
2363 if ((IS_CHAR(val) && (outofrange == 0))) {
2364 return(val);
2365 } else {
2366 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2367 "xmlParseCharRef: invalid xmlChar value %d\n",
2368 val);
2369 }
2370 return(0);
2371 }
2372
2373 /**
2374 * xmlParseStringCharRef:
2375 * @ctxt: an XML parser context
2376 * @str: a pointer to an index in the string
2377 *
2378 * parse Reference declarations, variant parsing from a string rather
2379 * than an an input flow.
2380 *
2381 * [66] CharRef ::= '&#' [0-9]+ ';' |
2382 * '&#x' [0-9a-fA-F]+ ';'
2383 *
2384 * [ WFC: Legal Character ]
2385 * Characters referred to using character references must match the
2386 * production for Char.
2387 *
2388 * Returns the value parsed (as an int), 0 in case of error, str will be
2389 * updated to the current value of the index
2390 */
2391 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2392 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2393 const xmlChar *ptr;
2394 xmlChar cur;
2395 unsigned int val = 0;
2396 unsigned int outofrange = 0;
2397
2398 if ((str == NULL) || (*str == NULL)) return(0);
2399 ptr = *str;
2400 cur = *ptr;
2401 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2402 ptr += 3;
2403 cur = *ptr;
2404 while (cur != ';') { /* Non input consuming loop */
2405 if ((cur >= '0') && (cur <= '9'))
2406 val = val * 16 + (cur - '0');
2407 else if ((cur >= 'a') && (cur <= 'f'))
2408 val = val * 16 + (cur - 'a') + 10;
2409 else if ((cur >= 'A') && (cur <= 'F'))
2410 val = val * 16 + (cur - 'A') + 10;
2411 else {
2412 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2413 val = 0;
2414 break;
2415 }
2416 if (val > 0x10FFFF)
2417 outofrange = val;
2418
2419 ptr++;
2420 cur = *ptr;
2421 }
2422 if (cur == ';')
2423 ptr++;
2424 } else if ((cur == '&') && (ptr[1] == '#')){
2425 ptr += 2;
2426 cur = *ptr;
2427 while (cur != ';') { /* Non input consuming loops */
2428 if ((cur >= '0') && (cur <= '9'))
2429 val = val * 10 + (cur - '0');
2430 else {
2431 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2432 val = 0;
2433 break;
2434 }
2435 if (val > 0x10FFFF)
2436 outofrange = val;
2437
2438 ptr++;
2439 cur = *ptr;
2440 }
2441 if (cur == ';')
2442 ptr++;
2443 } else {
2444 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2445 return(0);
2446 }
2447 *str = ptr;
2448
2449 /*
2450 * [ WFC: Legal Character ]
2451 * Characters referred to using character references must match the
2452 * production for Char.
2453 */
2454 if ((IS_CHAR(val) && (outofrange == 0))) {
2455 return(val);
2456 } else {
2457 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2458 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2459 val);
2460 }
2461 return(0);
2462 }
2463
2464 /**
2465 * xmlParserHandlePEReference:
2466 * @ctxt: the parser context
2467 *
2468 * [69] PEReference ::= '%' Name ';'
2469 *
2470 * [ WFC: No Recursion ]
2471 * A parsed entity must not contain a recursive
2472 * reference to itself, either directly or indirectly.
2473 *
2474 * [ WFC: Entity Declared ]
2475 * In a document without any DTD, a document with only an internal DTD
2476 * subset which contains no parameter entity references, or a document
2477 * with "standalone='yes'", ... ... The declaration of a parameter
2478 * entity must precede any reference to it...
2479 *
2480 * [ VC: Entity Declared ]
2481 * In a document with an external subset or external parameter entities
2482 * with "standalone='no'", ... ... The declaration of a parameter entity
2483 * must precede any reference to it...
2484 *
2485 * [ WFC: In DTD ]
2486 * Parameter-entity references may only appear in the DTD.
2487 * NOTE: misleading but this is handled.
2488 *
2489 * A PEReference may have been detected in the current input stream
2490 * the handling is done accordingly to
2491 * http://www.w3.org/TR/REC-xml#entproc
2492 * i.e.
2493 * - Included in literal in entity values
2494 * - Included as Parameter Entity reference within DTDs
2495 */
2496 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2497 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2498 switch(ctxt->instate) {
2499 case XML_PARSER_CDATA_SECTION:
2500 return;
2501 case XML_PARSER_COMMENT:
2502 return;
2503 case XML_PARSER_START_TAG:
2504 return;
2505 case XML_PARSER_END_TAG:
2506 return;
2507 case XML_PARSER_EOF:
2508 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2509 return;
2510 case XML_PARSER_PROLOG:
2511 case XML_PARSER_START:
2512 case XML_PARSER_MISC:
2513 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2514 return;
2515 case XML_PARSER_ENTITY_DECL:
2516 case XML_PARSER_CONTENT:
2517 case XML_PARSER_ATTRIBUTE_VALUE:
2518 case XML_PARSER_PI:
2519 case XML_PARSER_SYSTEM_LITERAL:
2520 case XML_PARSER_PUBLIC_LITERAL:
2521 /* we just ignore it there */
2522 return;
2523 case XML_PARSER_EPILOG:
2524 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2525 return;
2526 case XML_PARSER_ENTITY_VALUE:
2527 /*
2528 * NOTE: in the case of entity values, we don't do the
2529 * substitution here since we need the literal
2530 * entity value to be able to save the internal
2531 * subset of the document.
2532 * This will be handled by xmlStringDecodeEntities
2533 */
2534 return;
2535 case XML_PARSER_DTD:
2536 /*
2537 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2538 * In the internal DTD subset, parameter-entity references
2539 * can occur only where markup declarations can occur, not
2540 * within markup declarations.
2541 * In that case this is handled in xmlParseMarkupDecl
2542 */
2543 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2544 return;
2545 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2546 return;
2547 break;
2548 case XML_PARSER_IGNORE:
2549 return;
2550 }
2551
2552 xmlParsePEReference(ctxt);
2553 }
2554
2555 /*
2556 * Macro used to grow the current buffer.
2557 * buffer##_size is expected to be a size_t
2558 * mem_error: is expected to handle memory allocation failures
2559 */
2560 #define growBuffer(buffer, n) { \
2561 xmlChar *tmp; \
2562 size_t new_size = buffer##_size * 2 + n; \
2563 if (new_size < buffer##_size) goto mem_error; \
2564 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2565 if (tmp == NULL) goto mem_error; \
2566 buffer = tmp; \
2567 buffer##_size = new_size; \
2568 }
2569
2570 /**
2571 * xmlStringLenDecodeEntities:
2572 * @ctxt: the parser context
2573 * @str: the input string
2574 * @len: the string length
2575 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2576 * @end: an end marker xmlChar, 0 if none
2577 * @end2: an end marker xmlChar, 0 if none
2578 * @end3: an end marker xmlChar, 0 if none
2579 *
2580 * Takes a entity string content and process to do the adequate substitutions.
2581 *
2582 * [67] Reference ::= EntityRef | CharRef
2583 *
2584 * [69] PEReference ::= '%' Name ';'
2585 *
2586 * Returns A newly allocated string with the substitution done. The caller
2587 * must deallocate it !
2588 */
2589 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2590 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2591 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2592 xmlChar *buffer = NULL;
2593 size_t buffer_size = 0;
2594 size_t nbchars = 0;
2595
2596 xmlChar *current = NULL;
2597 xmlChar *rep = NULL;
2598 const xmlChar *last;
2599 xmlEntityPtr ent;
2600 int c,l;
2601
2602 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2603 return(NULL);
2604 last = str + len;
2605
2606 if (((ctxt->depth > 40) &&
2607 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2608 (ctxt->depth > 1024)) {
2609 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2610 return(NULL);
2611 }
2612
2613 /*
2614 * allocate a translation buffer.
2615 */
2616 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2617 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2618 if (buffer == NULL) goto mem_error;
2619
2620 /*
2621 * OK loop until we reach one of the ending char or a size limit.
2622 * we are operating on already parsed values.
2623 */
2624 if (str < last)
2625 c = CUR_SCHAR(str, l);
2626 else
2627 c = 0;
2628 while ((c != 0) && (c != end) && /* non input consuming loop */
2629 (c != end2) && (c != end3)) {
2630
2631 if (c == 0) break;
2632 if ((c == '&') && (str[1] == '#')) {
2633 int val = xmlParseStringCharRef(ctxt, &str);
2634 if (val == 0)
2635 goto int_error;
2636 COPY_BUF(0,buffer,nbchars,val);
2637 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2638 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2639 }
2640 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2641 if (xmlParserDebugEntities)
2642 xmlGenericError(xmlGenericErrorContext,
2643 "String decoding Entity Reference: %.30s\n",
2644 str);
2645 ent = xmlParseStringEntityRef(ctxt, &str);
2646 xmlParserEntityCheck(ctxt, 0, ent, 0);
2647 if (ent != NULL)
2648 ctxt->nbentities += ent->checked / 2;
2649 if ((ent != NULL) &&
2650 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2651 if (ent->content != NULL) {
2652 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2653 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2654 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2655 }
2656 } else {
2657 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658 "predefined entity has no content\n");
2659 goto int_error;
2660 }
2661 } else if ((ent != NULL) && (ent->content != NULL)) {
2662 ctxt->depth++;
2663 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2664 0, 0, 0);
2665 ctxt->depth--;
2666 if (rep == NULL)
2667 goto int_error;
2668
2669 current = rep;
2670 while (*current != 0) { /* non input consuming loop */
2671 buffer[nbchars++] = *current++;
2672 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2673 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2674 goto int_error;
2675 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2676 }
2677 }
2678 xmlFree(rep);
2679 rep = NULL;
2680 } else if (ent != NULL) {
2681 int i = xmlStrlen(ent->name);
2682 const xmlChar *cur = ent->name;
2683
2684 buffer[nbchars++] = '&';
2685 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2686 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2687 }
2688 for (;i > 0;i--)
2689 buffer[nbchars++] = *cur++;
2690 buffer[nbchars++] = ';';
2691 }
2692 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2693 if (xmlParserDebugEntities)
2694 xmlGenericError(xmlGenericErrorContext,
2695 "String decoding PE Reference: %.30s\n", str);
2696 ent = xmlParseStringPEReference(ctxt, &str);
2697 xmlParserEntityCheck(ctxt, 0, ent, 0);
2698 if (ent != NULL)
2699 ctxt->nbentities += ent->checked / 2;
2700 if (ent != NULL) {
2701 if (ent->content == NULL) {
2702 /*
2703 * Note: external parsed entities will not be loaded,
2704 * it is not required for a non-validating parser to
2705 * complete external PEreferences coming from the
2706 * internal subset
2707 */
2708 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2709 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2710 (ctxt->validate != 0)) {
2711 xmlLoadEntityContent(ctxt, ent);
2712 } else {
2713 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2714 "not validating will not read content for PE entity %s\n",
2715 ent->name, NULL);
2716 }
2717 }
2718 ctxt->depth++;
2719 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2720 0, 0, 0);
2721 ctxt->depth--;
2722 if (rep == NULL)
2723 goto int_error;
2724 current = rep;
2725 while (*current != 0) { /* non input consuming loop */
2726 buffer[nbchars++] = *current++;
2727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2729 goto int_error;
2730 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731 }
2732 }
2733 xmlFree(rep);
2734 rep = NULL;
2735 }
2736 } else {
2737 COPY_BUF(l,buffer,nbchars,c);
2738 str += l;
2739 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2741 }
2742 }
2743 if (str < last)
2744 c = CUR_SCHAR(str, l);
2745 else
2746 c = 0;
2747 }
2748 buffer[nbchars] = 0;
2749 return(buffer);
2750
2751 mem_error:
2752 xmlErrMemory(ctxt, NULL);
2753 int_error:
2754 if (rep != NULL)
2755 xmlFree(rep);
2756 if (buffer != NULL)
2757 xmlFree(buffer);
2758 return(NULL);
2759 }
2760
2761 /**
2762 * xmlStringDecodeEntities:
2763 * @ctxt: the parser context
2764 * @str: the input string
2765 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2766 * @end: an end marker xmlChar, 0 if none
2767 * @end2: an end marker xmlChar, 0 if none
2768 * @end3: an end marker xmlChar, 0 if none
2769 *
2770 * Takes a entity string content and process to do the adequate substitutions.
2771 *
2772 * [67] Reference ::= EntityRef | CharRef
2773 *
2774 * [69] PEReference ::= '%' Name ';'
2775 *
2776 * Returns A newly allocated string with the substitution done. The caller
2777 * must deallocate it !
2778 */
2779 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2780 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2781 xmlChar end, xmlChar end2, xmlChar end3) {
2782 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2783 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2784 end, end2, end3));
2785 }
2786
2787 /************************************************************************
2788 * *
2789 * Commodity functions, cleanup needed ? *
2790 * *
2791 ************************************************************************/
2792
2793 /**
2794 * areBlanks:
2795 * @ctxt: an XML parser context
2796 * @str: a xmlChar *
2797 * @len: the size of @str
2798 * @blank_chars: we know the chars are blanks
2799 *
2800 * Is this a sequence of blank chars that one can ignore ?
2801 *
2802 * Returns 1 if ignorable 0 otherwise.
2803 */
2804
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2805 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806 int blank_chars) {
2807 int i, ret;
2808 xmlNodePtr lastChild;
2809
2810 /*
2811 * Don't spend time trying to differentiate them, the same callback is
2812 * used !
2813 */
2814 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2815 return(0);
2816
2817 /*
2818 * Check for xml:space value.
2819 */
2820 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2821 (*(ctxt->space) == -2))
2822 return(0);
2823
2824 /*
2825 * Check that the string is made of blanks
2826 */
2827 if (blank_chars == 0) {
2828 for (i = 0;i < len;i++)
2829 if (!(IS_BLANK_CH(str[i]))) return(0);
2830 }
2831
2832 /*
2833 * Look if the element is mixed content in the DTD if available
2834 */
2835 if (ctxt->node == NULL) return(0);
2836 if (ctxt->myDoc != NULL) {
2837 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838 if (ret == 0) return(1);
2839 if (ret == 1) return(0);
2840 }
2841
2842 /*
2843 * Otherwise, heuristic :-\
2844 */
2845 if ((RAW != '<') && (RAW != 0xD)) return(0);
2846 if ((ctxt->node->children == NULL) &&
2847 (RAW == '<') && (NXT(1) == '/')) return(0);
2848
2849 lastChild = xmlGetLastChild(ctxt->node);
2850 if (lastChild == NULL) {
2851 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2852 (ctxt->node->content != NULL)) return(0);
2853 } else if (xmlNodeIsText(lastChild))
2854 return(0);
2855 else if ((ctxt->node->children != NULL) &&
2856 (xmlNodeIsText(ctxt->node->children)))
2857 return(0);
2858 return(1);
2859 }
2860
2861 /************************************************************************
2862 * *
2863 * Extra stuff for namespace support *
2864 * Relates to http://www.w3.org/TR/WD-xml-names *
2865 * *
2866 ************************************************************************/
2867
2868 /**
2869 * xmlSplitQName:
2870 * @ctxt: an XML parser context
2871 * @name: an XML parser context
2872 * @prefix: a xmlChar **
2873 *
2874 * parse an UTF8 encoded XML qualified name string
2875 *
2876 * [NS 5] QName ::= (Prefix ':')? LocalPart
2877 *
2878 * [NS 6] Prefix ::= NCName
2879 *
2880 * [NS 7] LocalPart ::= NCName
2881 *
2882 * Returns the local part, and prefix is updated
2883 * to get the Prefix if any.
2884 */
2885
2886 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2887 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2888 xmlChar buf[XML_MAX_NAMELEN + 5];
2889 xmlChar *buffer = NULL;
2890 int len = 0;
2891 int max = XML_MAX_NAMELEN;
2892 xmlChar *ret = NULL;
2893 const xmlChar *cur = name;
2894 int c;
2895
2896 if (prefix == NULL) return(NULL);
2897 *prefix = NULL;
2898
2899 if (cur == NULL) return(NULL);
2900
2901 #ifndef XML_XML_NAMESPACE
2902 /* xml: prefix is not really a namespace */
2903 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2904 (cur[2] == 'l') && (cur[3] == ':'))
2905 return(xmlStrdup(name));
2906 #endif
2907
2908 /* nasty but well=formed */
2909 if (cur[0] == ':')
2910 return(xmlStrdup(name));
2911
2912 c = *cur++;
2913 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2914 buf[len++] = c;
2915 c = *cur++;
2916 }
2917 if (len >= max) {
2918 /*
2919 * Okay someone managed to make a huge name, so he's ready to pay
2920 * for the processing speed.
2921 */
2922 max = len * 2;
2923
2924 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2925 if (buffer == NULL) {
2926 xmlErrMemory(ctxt, NULL);
2927 return(NULL);
2928 }
2929 memcpy(buffer, buf, len);
2930 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2931 if (len + 10 > max) {
2932 xmlChar *tmp;
2933
2934 max *= 2;
2935 tmp = (xmlChar *) xmlRealloc(buffer,
2936 max * sizeof(xmlChar));
2937 if (tmp == NULL) {
2938 xmlFree(buffer);
2939 xmlErrMemory(ctxt, NULL);
2940 return(NULL);
2941 }
2942 buffer = tmp;
2943 }
2944 buffer[len++] = c;
2945 c = *cur++;
2946 }
2947 buffer[len] = 0;
2948 }
2949
2950 if ((c == ':') && (*cur == 0)) {
2951 if (buffer != NULL)
2952 xmlFree(buffer);
2953 *prefix = NULL;
2954 return(xmlStrdup(name));
2955 }
2956
2957 if (buffer == NULL)
2958 ret = xmlStrndup(buf, len);
2959 else {
2960 ret = buffer;
2961 buffer = NULL;
2962 max = XML_MAX_NAMELEN;
2963 }
2964
2965
2966 if (c == ':') {
2967 c = *cur;
2968 *prefix = ret;
2969 if (c == 0) {
2970 return(xmlStrndup(BAD_CAST "", 0));
2971 }
2972 len = 0;
2973
2974 /*
2975 * Check that the first character is proper to start
2976 * a new name
2977 */
2978 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2979 ((c >= 0x41) && (c <= 0x5A)) ||
2980 (c == '_') || (c == ':'))) {
2981 int l;
2982 int first = CUR_SCHAR(cur, l);
2983
2984 if (!IS_LETTER(first) && (first != '_')) {
2985 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2986 "Name %s is not XML Namespace compliant\n",
2987 name);
2988 }
2989 }
2990 cur++;
2991
2992 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2993 buf[len++] = c;
2994 c = *cur++;
2995 }
2996 if (len >= max) {
2997 /*
2998 * Okay someone managed to make a huge name, so he's ready to pay
2999 * for the processing speed.
3000 */
3001 max = len * 2;
3002
3003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3004 if (buffer == NULL) {
3005 xmlErrMemory(ctxt, NULL);
3006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
3009 while (c != 0) { /* tested bigname2.xml */
3010 if (len + 10 > max) {
3011 xmlChar *tmp;
3012
3013 max *= 2;
3014 tmp = (xmlChar *) xmlRealloc(buffer,
3015 max * sizeof(xmlChar));
3016 if (tmp == NULL) {
3017 xmlErrMemory(ctxt, NULL);
3018 xmlFree(buffer);
3019 return(NULL);
3020 }
3021 buffer = tmp;
3022 }
3023 buffer[len++] = c;
3024 c = *cur++;
3025 }
3026 buffer[len] = 0;
3027 }
3028
3029 if (buffer == NULL)
3030 ret = xmlStrndup(buf, len);
3031 else {
3032 ret = buffer;
3033 }
3034 }
3035
3036 return(ret);
3037 }
3038
3039 /************************************************************************
3040 * *
3041 * The parser itself *
3042 * Relates to http://www.w3.org/TR/REC-xml *
3043 * *
3044 ************************************************************************/
3045
3046 /************************************************************************
3047 * *
3048 * Routines to parse Name, NCName and NmToken *
3049 * *
3050 ************************************************************************/
3051 #ifdef DEBUG
3052 static unsigned long nbParseName = 0;
3053 static unsigned long nbParseNmToken = 0;
3054 static unsigned long nbParseNCName = 0;
3055 static unsigned long nbParseNCNameComplex = 0;
3056 static unsigned long nbParseNameComplex = 0;
3057 static unsigned long nbParseStringName = 0;
3058 #endif
3059
3060 /*
3061 * The two following functions are related to the change of accepted
3062 * characters for Name and NmToken in the Revision 5 of XML-1.0
3063 * They correspond to the modified production [4] and the new production [4a]
3064 * changes in that revision. Also note that the macros used for the
3065 * productions Letter, Digit, CombiningChar and Extender are not needed
3066 * anymore.
3067 * We still keep compatibility to pre-revision5 parsing semantic if the
3068 * new XML_PARSE_OLD10 option is given to the parser.
3069 */
3070 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3071 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3072 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3073 /*
3074 * Use the new checks of production [4] [4a] amd [5] of the
3075 * Update 5 of XML-1.0
3076 */
3077 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3078 (((c >= 'a') && (c <= 'z')) ||
3079 ((c >= 'A') && (c <= 'Z')) ||
3080 (c == '_') || (c == ':') ||
3081 ((c >= 0xC0) && (c <= 0xD6)) ||
3082 ((c >= 0xD8) && (c <= 0xF6)) ||
3083 ((c >= 0xF8) && (c <= 0x2FF)) ||
3084 ((c >= 0x370) && (c <= 0x37D)) ||
3085 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3086 ((c >= 0x200C) && (c <= 0x200D)) ||
3087 ((c >= 0x2070) && (c <= 0x218F)) ||
3088 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3089 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3090 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3091 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3092 ((c >= 0x10000) && (c <= 0xEFFFF))))
3093 return(1);
3094 } else {
3095 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3096 return(1);
3097 }
3098 return(0);
3099 }
3100
3101 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3102 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3103 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3104 /*
3105 * Use the new checks of production [4] [4a] amd [5] of the
3106 * Update 5 of XML-1.0
3107 */
3108 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3109 (((c >= 'a') && (c <= 'z')) ||
3110 ((c >= 'A') && (c <= 'Z')) ||
3111 ((c >= '0') && (c <= '9')) || /* !start */
3112 (c == '_') || (c == ':') ||
3113 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3114 ((c >= 0xC0) && (c <= 0xD6)) ||
3115 ((c >= 0xD8) && (c <= 0xF6)) ||
3116 ((c >= 0xF8) && (c <= 0x2FF)) ||
3117 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3118 ((c >= 0x370) && (c <= 0x37D)) ||
3119 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3120 ((c >= 0x200C) && (c <= 0x200D)) ||
3121 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3122 ((c >= 0x2070) && (c <= 0x218F)) ||
3123 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3124 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3125 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3126 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3127 ((c >= 0x10000) && (c <= 0xEFFFF))))
3128 return(1);
3129 } else {
3130 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131 (c == '.') || (c == '-') ||
3132 (c == '_') || (c == ':') ||
3133 (IS_COMBINING(c)) ||
3134 (IS_EXTENDER(c)))
3135 return(1);
3136 }
3137 return(0);
3138 }
3139
3140 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3141 int *len, int *alloc, int normalize);
3142
3143 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3144 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3145 int len = 0, l;
3146 int c;
3147 int count = 0;
3148
3149 #ifdef DEBUG
3150 nbParseNameComplex++;
3151 #endif
3152
3153 /*
3154 * Handler for more complex cases
3155 */
3156 GROW;
3157 if (ctxt->instate == XML_PARSER_EOF)
3158 return(NULL);
3159 c = CUR_CHAR(l);
3160 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161 /*
3162 * Use the new checks of production [4] [4a] amd [5] of the
3163 * Update 5 of XML-1.0
3164 */
3165 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3166 (!(((c >= 'a') && (c <= 'z')) ||
3167 ((c >= 'A') && (c <= 'Z')) ||
3168 (c == '_') || (c == ':') ||
3169 ((c >= 0xC0) && (c <= 0xD6)) ||
3170 ((c >= 0xD8) && (c <= 0xF6)) ||
3171 ((c >= 0xF8) && (c <= 0x2FF)) ||
3172 ((c >= 0x370) && (c <= 0x37D)) ||
3173 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3174 ((c >= 0x200C) && (c <= 0x200D)) ||
3175 ((c >= 0x2070) && (c <= 0x218F)) ||
3176 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3177 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3178 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3179 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3180 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3181 return(NULL);
3182 }
3183 len += l;
3184 NEXTL(l);
3185 c = CUR_CHAR(l);
3186 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3187 (((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 ((c >= '0') && (c <= '9')) || /* !start */
3190 (c == '_') || (c == ':') ||
3191 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3192 ((c >= 0xC0) && (c <= 0xD6)) ||
3193 ((c >= 0xD8) && (c <= 0xF6)) ||
3194 ((c >= 0xF8) && (c <= 0x2FF)) ||
3195 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3196 ((c >= 0x370) && (c <= 0x37D)) ||
3197 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3198 ((c >= 0x200C) && (c <= 0x200D)) ||
3199 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3200 ((c >= 0x2070) && (c <= 0x218F)) ||
3201 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205 ((c >= 0x10000) && (c <= 0xEFFFF))
3206 )) {
3207 if (count++ > XML_PARSER_CHUNK_SIZE) {
3208 count = 0;
3209 GROW;
3210 if (ctxt->instate == XML_PARSER_EOF)
3211 return(NULL);
3212 }
3213 len += l;
3214 NEXTL(l);
3215 c = CUR_CHAR(l);
3216 }
3217 } else {
3218 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3219 (!IS_LETTER(c) && (c != '_') &&
3220 (c != ':'))) {
3221 return(NULL);
3222 }
3223 len += l;
3224 NEXTL(l);
3225 c = CUR_CHAR(l);
3226
3227 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3228 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3229 (c == '.') || (c == '-') ||
3230 (c == '_') || (c == ':') ||
3231 (IS_COMBINING(c)) ||
3232 (IS_EXTENDER(c)))) {
3233 if (count++ > XML_PARSER_CHUNK_SIZE) {
3234 count = 0;
3235 GROW;
3236 if (ctxt->instate == XML_PARSER_EOF)
3237 return(NULL);
3238 }
3239 len += l;
3240 NEXTL(l);
3241 c = CUR_CHAR(l);
3242 }
3243 }
3244 if ((len > XML_MAX_NAME_LENGTH) &&
3245 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3246 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3247 return(NULL);
3248 }
3249 if (ctxt->input->cur - ctxt->input->base < len) {
3250 /*
3251 * There were a couple of bugs where PERefs lead to to a change
3252 * of the buffer. Check the buffer size to avoid passing an invalid
3253 * pointer to xmlDictLookup.
3254 */
3255 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3256 "unexpected change of input buffer");
3257 return (NULL);
3258 }
3259 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3260 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3261 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3262 }
3263
3264 /**
3265 * xmlParseName:
3266 * @ctxt: an XML parser context
3267 *
3268 * parse an XML name.
3269 *
3270 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3271 * CombiningChar | Extender
3272 *
3273 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3274 *
3275 * [6] Names ::= Name (#x20 Name)*
3276 *
3277 * Returns the Name parsed or NULL
3278 */
3279
3280 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3281 xmlParseName(xmlParserCtxtPtr ctxt) {
3282 const xmlChar *in;
3283 const xmlChar *ret;
3284 int count = 0;
3285
3286 GROW;
3287
3288 #ifdef DEBUG
3289 nbParseName++;
3290 #endif
3291
3292 /*
3293 * Accelerator for simple ASCII names
3294 */
3295 in = ctxt->input->cur;
3296 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3297 ((*in >= 0x41) && (*in <= 0x5A)) ||
3298 (*in == '_') || (*in == ':')) {
3299 in++;
3300 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3301 ((*in >= 0x41) && (*in <= 0x5A)) ||
3302 ((*in >= 0x30) && (*in <= 0x39)) ||
3303 (*in == '_') || (*in == '-') ||
3304 (*in == ':') || (*in == '.'))
3305 in++;
3306 if ((*in > 0) && (*in < 0x80)) {
3307 count = in - ctxt->input->cur;
3308 if ((count > XML_MAX_NAME_LENGTH) &&
3309 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3310 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3311 return(NULL);
3312 }
3313 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3314 ctxt->input->cur = in;
3315 ctxt->nbChars += count;
3316 ctxt->input->col += count;
3317 if (ret == NULL)
3318 xmlErrMemory(ctxt, NULL);
3319 return(ret);
3320 }
3321 }
3322 /* accelerator for special cases */
3323 return(xmlParseNameComplex(ctxt));
3324 }
3325
3326 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3327 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3328 int len = 0, l;
3329 int c;
3330 int count = 0;
3331 size_t startPosition = 0;
3332
3333 #ifdef DEBUG
3334 nbParseNCNameComplex++;
3335 #endif
3336
3337 /*
3338 * Handler for more complex cases
3339 */
3340 GROW;
3341 startPosition = CUR_PTR - BASE_PTR;
3342 c = CUR_CHAR(l);
3343 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3344 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3345 return(NULL);
3346 }
3347
3348 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3349 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3350 if (count++ > XML_PARSER_CHUNK_SIZE) {
3351 if ((len > XML_MAX_NAME_LENGTH) &&
3352 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354 return(NULL);
3355 }
3356 count = 0;
3357 GROW;
3358 if (ctxt->instate == XML_PARSER_EOF)
3359 return(NULL);
3360 }
3361 len += l;
3362 NEXTL(l);
3363 c = CUR_CHAR(l);
3364 if (c == 0) {
3365 count = 0;
3366 /*
3367 * when shrinking to extend the buffer we really need to preserve
3368 * the part of the name we already parsed. Hence rolling back
3369 * by current lenght.
3370 */
3371 ctxt->input->cur -= l;
3372 GROW;
3373 if (ctxt->instate == XML_PARSER_EOF)
3374 return(NULL);
3375 ctxt->input->cur += l;
3376 c = CUR_CHAR(l);
3377 }
3378 }
3379 if ((len > XML_MAX_NAME_LENGTH) &&
3380 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3381 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3382 return(NULL);
3383 }
3384 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3385 }
3386
3387 /**
3388 * xmlParseNCName:
3389 * @ctxt: an XML parser context
3390 * @len: length of the string parsed
3391 *
3392 * parse an XML name.
3393 *
3394 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3395 * CombiningChar | Extender
3396 *
3397 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3398 *
3399 * Returns the Name parsed or NULL
3400 */
3401
3402 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3403 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3404 const xmlChar *in, *e;
3405 const xmlChar *ret;
3406 int count = 0;
3407
3408 #ifdef DEBUG
3409 nbParseNCName++;
3410 #endif
3411
3412 /*
3413 * Accelerator for simple ASCII names
3414 */
3415 in = ctxt->input->cur;
3416 e = ctxt->input->end;
3417 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3418 ((*in >= 0x41) && (*in <= 0x5A)) ||
3419 (*in == '_')) && (in < e)) {
3420 in++;
3421 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3422 ((*in >= 0x41) && (*in <= 0x5A)) ||
3423 ((*in >= 0x30) && (*in <= 0x39)) ||
3424 (*in == '_') || (*in == '-') ||
3425 (*in == '.')) && (in < e))
3426 in++;
3427 if (in >= e)
3428 goto complex;
3429 if ((*in > 0) && (*in < 0x80)) {
3430 count = in - ctxt->input->cur;
3431 if ((count > XML_MAX_NAME_LENGTH) &&
3432 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3433 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3434 return(NULL);
3435 }
3436 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3437 ctxt->input->cur = in;
3438 ctxt->nbChars += count;
3439 ctxt->input->col += count;
3440 if (ret == NULL) {
3441 xmlErrMemory(ctxt, NULL);
3442 }
3443 return(ret);
3444 }
3445 }
3446 complex:
3447 return(xmlParseNCNameComplex(ctxt));
3448 }
3449
3450 /**
3451 * xmlParseNameAndCompare:
3452 * @ctxt: an XML parser context
3453 *
3454 * parse an XML name and compares for match
3455 * (specialized for endtag parsing)
3456 *
3457 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3458 * and the name for mismatch
3459 */
3460
3461 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3462 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3463 register const xmlChar *cmp = other;
3464 register const xmlChar *in;
3465 const xmlChar *ret;
3466
3467 GROW;
3468 if (ctxt->instate == XML_PARSER_EOF)
3469 return(NULL);
3470
3471 in = ctxt->input->cur;
3472 while (*in != 0 && *in == *cmp) {
3473 ++in;
3474 ++cmp;
3475 ctxt->input->col++;
3476 }
3477 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3478 /* success */
3479 ctxt->input->cur = in;
3480 return (const xmlChar*) 1;
3481 }
3482 /* failure (or end of input buffer), check with full function */
3483 ret = xmlParseName (ctxt);
3484 /* strings coming from the dictionary direct compare possible */
3485 if (ret == other) {
3486 return (const xmlChar*) 1;
3487 }
3488 return ret;
3489 }
3490
3491 /**
3492 * xmlParseStringName:
3493 * @ctxt: an XML parser context
3494 * @str: a pointer to the string pointer (IN/OUT)
3495 *
3496 * parse an XML name.
3497 *
3498 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3499 * CombiningChar | Extender
3500 *
3501 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3502 *
3503 * [6] Names ::= Name (#x20 Name)*
3504 *
3505 * Returns the Name parsed or NULL. The @str pointer
3506 * is updated to the current location in the string.
3507 */
3508
3509 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3510 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3511 xmlChar buf[XML_MAX_NAMELEN + 5];
3512 const xmlChar *cur = *str;
3513 int len = 0, l;
3514 int c;
3515
3516 #ifdef DEBUG
3517 nbParseStringName++;
3518 #endif
3519
3520 c = CUR_SCHAR(cur, l);
3521 if (!xmlIsNameStartChar(ctxt, c)) {
3522 return(NULL);
3523 }
3524
3525 COPY_BUF(l,buf,len,c);
3526 cur += l;
3527 c = CUR_SCHAR(cur, l);
3528 while (xmlIsNameChar(ctxt, c)) {
3529 COPY_BUF(l,buf,len,c);
3530 cur += l;
3531 c = CUR_SCHAR(cur, l);
3532 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3533 /*
3534 * Okay someone managed to make a huge name, so he's ready to pay
3535 * for the processing speed.
3536 */
3537 xmlChar *buffer;
3538 int max = len * 2;
3539
3540 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3541 if (buffer == NULL) {
3542 xmlErrMemory(ctxt, NULL);
3543 return(NULL);
3544 }
3545 memcpy(buffer, buf, len);
3546 while (xmlIsNameChar(ctxt, c)) {
3547 if (len + 10 > max) {
3548 xmlChar *tmp;
3549
3550 if ((len > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 xmlFree(buffer);
3554 return(NULL);
3555 }
3556 max *= 2;
3557 tmp = (xmlChar *) xmlRealloc(buffer,
3558 max * sizeof(xmlChar));
3559 if (tmp == NULL) {
3560 xmlErrMemory(ctxt, NULL);
3561 xmlFree(buffer);
3562 return(NULL);
3563 }
3564 buffer = tmp;
3565 }
3566 COPY_BUF(l,buffer,len,c);
3567 cur += l;
3568 c = CUR_SCHAR(cur, l);
3569 }
3570 buffer[len] = 0;
3571 *str = cur;
3572 return(buffer);
3573 }
3574 }
3575 if ((len > XML_MAX_NAME_LENGTH) &&
3576 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578 return(NULL);
3579 }
3580 *str = cur;
3581 return(xmlStrndup(buf, len));
3582 }
3583
3584 /**
3585 * xmlParseNmtoken:
3586 * @ctxt: an XML parser context
3587 *
3588 * parse an XML Nmtoken.
3589 *
3590 * [7] Nmtoken ::= (NameChar)+
3591 *
3592 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3593 *
3594 * Returns the Nmtoken parsed or NULL
3595 */
3596
3597 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3598 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3599 xmlChar buf[XML_MAX_NAMELEN + 5];
3600 int len = 0, l;
3601 int c;
3602 int count = 0;
3603
3604 #ifdef DEBUG
3605 nbParseNmToken++;
3606 #endif
3607
3608 GROW;
3609 if (ctxt->instate == XML_PARSER_EOF)
3610 return(NULL);
3611 c = CUR_CHAR(l);
3612
3613 while (xmlIsNameChar(ctxt, c)) {
3614 if (count++ > XML_PARSER_CHUNK_SIZE) {
3615 count = 0;
3616 GROW;
3617 }
3618 COPY_BUF(l,buf,len,c);
3619 NEXTL(l);
3620 c = CUR_CHAR(l);
3621 if (c == 0) {
3622 count = 0;
3623 GROW;
3624 if (ctxt->instate == XML_PARSER_EOF)
3625 return(NULL);
3626 c = CUR_CHAR(l);
3627 }
3628 if (len >= XML_MAX_NAMELEN) {
3629 /*
3630 * Okay someone managed to make a huge token, so he's ready to pay
3631 * for the processing speed.
3632 */
3633 xmlChar *buffer;
3634 int max = len * 2;
3635
3636 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3637 if (buffer == NULL) {
3638 xmlErrMemory(ctxt, NULL);
3639 return(NULL);
3640 }
3641 memcpy(buffer, buf, len);
3642 while (xmlIsNameChar(ctxt, c)) {
3643 if (count++ > XML_PARSER_CHUNK_SIZE) {
3644 count = 0;
3645 GROW;
3646 if (ctxt->instate == XML_PARSER_EOF) {
3647 xmlFree(buffer);
3648 return(NULL);
3649 }
3650 }
3651 if (len + 10 > max) {
3652 xmlChar *tmp;
3653
3654 if ((max > XML_MAX_NAME_LENGTH) &&
3655 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3656 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3657 xmlFree(buffer);
3658 return(NULL);
3659 }
3660 max *= 2;
3661 tmp = (xmlChar *) xmlRealloc(buffer,
3662 max * sizeof(xmlChar));
3663 if (tmp == NULL) {
3664 xmlErrMemory(ctxt, NULL);
3665 xmlFree(buffer);
3666 return(NULL);
3667 }
3668 buffer = tmp;
3669 }
3670 COPY_BUF(l,buffer,len,c);
3671 NEXTL(l);
3672 c = CUR_CHAR(l);
3673 }
3674 buffer[len] = 0;
3675 return(buffer);
3676 }
3677 }
3678 if (len == 0)
3679 return(NULL);
3680 if ((len > XML_MAX_NAME_LENGTH) &&
3681 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3682 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3683 return(NULL);
3684 }
3685 return(xmlStrndup(buf, len));
3686 }
3687
3688 /**
3689 * xmlParseEntityValue:
3690 * @ctxt: an XML parser context
3691 * @orig: if non-NULL store a copy of the original entity value
3692 *
3693 * parse a value for ENTITY declarations
3694 *
3695 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3696 * "'" ([^%&'] | PEReference | Reference)* "'"
3697 *
3698 * Returns the EntityValue parsed with reference substituted or NULL
3699 */
3700
3701 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3702 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3703 xmlChar *buf = NULL;
3704 int len = 0;
3705 int size = XML_PARSER_BUFFER_SIZE;
3706 int c, l;
3707 xmlChar stop;
3708 xmlChar *ret = NULL;
3709 const xmlChar *cur = NULL;
3710 xmlParserInputPtr input;
3711
3712 if (RAW == '"') stop = '"';
3713 else if (RAW == '\'') stop = '\'';
3714 else {
3715 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3716 return(NULL);
3717 }
3718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3719 if (buf == NULL) {
3720 xmlErrMemory(ctxt, NULL);
3721 return(NULL);
3722 }
3723
3724 /*
3725 * The content of the entity definition is copied in a buffer.
3726 */
3727
3728 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3729 input = ctxt->input;
3730 GROW;
3731 if (ctxt->instate == XML_PARSER_EOF)
3732 goto error;
3733 NEXT;
3734 c = CUR_CHAR(l);
3735 /*
3736 * NOTE: 4.4.5 Included in Literal
3737 * When a parameter entity reference appears in a literal entity
3738 * value, ... a single or double quote character in the replacement
3739 * text is always treated as a normal data character and will not
3740 * terminate the literal.
3741 * In practice it means we stop the loop only when back at parsing
3742 * the initial entity and the quote is found
3743 */
3744 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3745 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3746 if (len + 5 >= size) {
3747 xmlChar *tmp;
3748
3749 size *= 2;
3750 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3751 if (tmp == NULL) {
3752 xmlErrMemory(ctxt, NULL);
3753 goto error;
3754 }
3755 buf = tmp;
3756 }
3757 COPY_BUF(l,buf,len,c);
3758 NEXTL(l);
3759
3760 GROW;
3761 c = CUR_CHAR(l);
3762 if (c == 0) {
3763 GROW;
3764 c = CUR_CHAR(l);
3765 }
3766 }
3767 buf[len] = 0;
3768 if (ctxt->instate == XML_PARSER_EOF)
3769 goto error;
3770 if (c != stop) {
3771 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3772 goto error;
3773 }
3774 NEXT;
3775
3776 /*
3777 * Raise problem w.r.t. '&' and '%' being used in non-entities
3778 * reference constructs. Note Charref will be handled in
3779 * xmlStringDecodeEntities()
3780 */
3781 cur = buf;
3782 while (*cur != 0) { /* non input consuming */
3783 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3784 xmlChar *name;
3785 xmlChar tmp = *cur;
3786 int nameOk = 0;
3787
3788 cur++;
3789 name = xmlParseStringName(ctxt, &cur);
3790 if (name != NULL) {
3791 nameOk = 1;
3792 xmlFree(name);
3793 }
3794 if ((nameOk == 0) || (*cur != ';')) {
3795 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3796 "EntityValue: '%c' forbidden except for entities references\n",
3797 tmp);
3798 goto error;
3799 }
3800 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3801 (ctxt->inputNr == 1)) {
3802 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3803 goto error;
3804 }
3805 if (*cur == 0)
3806 break;
3807 }
3808 cur++;
3809 }
3810
3811 /*
3812 * Then PEReference entities are substituted.
3813 *
3814 * NOTE: 4.4.7 Bypassed
3815 * When a general entity reference appears in the EntityValue in
3816 * an entity declaration, it is bypassed and left as is.
3817 * so XML_SUBSTITUTE_REF is not set here.
3818 */
3819 ++ctxt->depth;
3820 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3821 0, 0, 0);
3822 --ctxt->depth;
3823 if (orig != NULL) {
3824 *orig = buf;
3825 buf = NULL;
3826 }
3827
3828 error:
3829 if (buf != NULL)
3830 xmlFree(buf);
3831 return(ret);
3832 }
3833
3834 /**
3835 * xmlParseAttValueComplex:
3836 * @ctxt: an XML parser context
3837 * @len: the resulting attribute len
3838 * @normalize: wether to apply the inner normalization
3839 *
3840 * parse a value for an attribute, this is the fallback function
3841 * of xmlParseAttValue() when the attribute parsing requires handling
3842 * of non-ASCII characters, or normalization compaction.
3843 *
3844 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3845 */
3846 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3847 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3848 xmlChar limit = 0;
3849 xmlChar *buf = NULL;
3850 xmlChar *rep = NULL;
3851 size_t len = 0;
3852 size_t buf_size = 0;
3853 int c, l, in_space = 0;
3854 xmlChar *current = NULL;
3855 xmlEntityPtr ent;
3856
3857 if (NXT(0) == '"') {
3858 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3859 limit = '"';
3860 NEXT;
3861 } else if (NXT(0) == '\'') {
3862 limit = '\'';
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864 NEXT;
3865 } else {
3866 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3867 return(NULL);
3868 }
3869
3870 /*
3871 * allocate a translation buffer.
3872 */
3873 buf_size = XML_PARSER_BUFFER_SIZE;
3874 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3875 if (buf == NULL) goto mem_error;
3876
3877 /*
3878 * OK loop until we reach one of the ending char or a size limit.
3879 */
3880 c = CUR_CHAR(l);
3881 while (((NXT(0) != limit) && /* checked */
3882 (IS_CHAR(c)) && (c != '<')) &&
3883 (ctxt->instate != XML_PARSER_EOF)) {
3884 /*
3885 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3886 * special option is given
3887 */
3888 if ((len > XML_MAX_TEXT_LENGTH) &&
3889 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3890 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3891 "AttValue length too long\n");
3892 goto mem_error;
3893 }
3894 if (c == 0) break;
3895 if (c == '&') {
3896 in_space = 0;
3897 if (NXT(1) == '#') {
3898 int val = xmlParseCharRef(ctxt);
3899
3900 if (val == '&') {
3901 if (ctxt->replaceEntities) {
3902 if (len + 10 > buf_size) {
3903 growBuffer(buf, 10);
3904 }
3905 buf[len++] = '&';
3906 } else {
3907 /*
3908 * The reparsing will be done in xmlStringGetNodeList()
3909 * called by the attribute() function in SAX.c
3910 */
3911 if (len + 10 > buf_size) {
3912 growBuffer(buf, 10);
3913 }
3914 buf[len++] = '&';
3915 buf[len++] = '#';
3916 buf[len++] = '3';
3917 buf[len++] = '8';
3918 buf[len++] = ';';
3919 }
3920 } else if (val != 0) {
3921 if (len + 10 > buf_size) {
3922 growBuffer(buf, 10);
3923 }
3924 len += xmlCopyChar(0, &buf[len], val);
3925 }
3926 } else {
3927 ent = xmlParseEntityRef(ctxt);
3928 ctxt->nbentities++;
3929 if (ent != NULL)
3930 ctxt->nbentities += ent->owner;
3931 if ((ent != NULL) &&
3932 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3933 if (len + 10 > buf_size) {
3934 growBuffer(buf, 10);
3935 }
3936 if ((ctxt->replaceEntities == 0) &&
3937 (ent->content[0] == '&')) {
3938 buf[len++] = '&';
3939 buf[len++] = '#';
3940 buf[len++] = '3';
3941 buf[len++] = '8';
3942 buf[len++] = ';';
3943 } else {
3944 buf[len++] = ent->content[0];
3945 }
3946 } else if ((ent != NULL) &&
3947 (ctxt->replaceEntities != 0)) {
3948 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3949 ++ctxt->depth;
3950 rep = xmlStringDecodeEntities(ctxt, ent->content,
3951 XML_SUBSTITUTE_REF,
3952 0, 0, 0);
3953 --ctxt->depth;
3954 if (rep != NULL) {
3955 current = rep;
3956 while (*current != 0) { /* non input consuming */
3957 if ((*current == 0xD) || (*current == 0xA) ||
3958 (*current == 0x9)) {
3959 buf[len++] = 0x20;
3960 current++;
3961 } else
3962 buf[len++] = *current++;
3963 if (len + 10 > buf_size) {
3964 growBuffer(buf, 10);
3965 }
3966 }
3967 xmlFree(rep);
3968 rep = NULL;
3969 }
3970 } else {
3971 if (len + 10 > buf_size) {
3972 growBuffer(buf, 10);
3973 }
3974 if (ent->content != NULL)
3975 buf[len++] = ent->content[0];
3976 }
3977 } else if (ent != NULL) {
3978 int i = xmlStrlen(ent->name);
3979 const xmlChar *cur = ent->name;
3980
3981 /*
3982 * This may look absurd but is needed to detect
3983 * entities problems
3984 */
3985 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3986 (ent->content != NULL) && (ent->checked == 0)) {
3987 unsigned long oldnbent = ctxt->nbentities;
3988
3989 ++ctxt->depth;
3990 rep = xmlStringDecodeEntities(ctxt, ent->content,
3991 XML_SUBSTITUTE_REF, 0, 0, 0);
3992 --ctxt->depth;
3993
3994 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
3995 if (rep != NULL) {
3996 if (xmlStrchr(rep, '<'))
3997 ent->checked |= 1;
3998 xmlFree(rep);
3999 rep = NULL;
4000 } else {
4001 ent->content[0] = 0;
4002 }
4003 }
4004
4005 /*
4006 * Just output the reference
4007 */
4008 buf[len++] = '&';
4009 while (len + i + 10 > buf_size) {
4010 growBuffer(buf, i + 10);
4011 }
4012 for (;i > 0;i--)
4013 buf[len++] = *cur++;
4014 buf[len++] = ';';
4015 }
4016 }
4017 } else {
4018 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4019 if ((len != 0) || (!normalize)) {
4020 if ((!normalize) || (!in_space)) {
4021 COPY_BUF(l,buf,len,0x20);
4022 while (len + 10 > buf_size) {
4023 growBuffer(buf, 10);
4024 }
4025 }
4026 in_space = 1;
4027 }
4028 } else {
4029 in_space = 0;
4030 COPY_BUF(l,buf,len,c);
4031 if (len + 10 > buf_size) {
4032 growBuffer(buf, 10);
4033 }
4034 }
4035 NEXTL(l);
4036 }
4037 GROW;
4038 c = CUR_CHAR(l);
4039 }
4040 if (ctxt->instate == XML_PARSER_EOF)
4041 goto error;
4042
4043 if ((in_space) && (normalize)) {
4044 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4045 }
4046 buf[len] = 0;
4047 if (RAW == '<') {
4048 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4049 } else if (RAW != limit) {
4050 if ((c != 0) && (!IS_CHAR(c))) {
4051 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4052 "invalid character in attribute value\n");
4053 } else {
4054 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4055 "AttValue: ' expected\n");
4056 }
4057 } else
4058 NEXT;
4059
4060 /*
4061 * There we potentially risk an overflow, don't allow attribute value of
4062 * length more than INT_MAX it is a very reasonnable assumption !
4063 */
4064 if (len >= INT_MAX) {
4065 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4066 "AttValue length too long\n");
4067 goto mem_error;
4068 }
4069
4070 if (attlen != NULL) *attlen = (int) len;
4071 return(buf);
4072
4073 mem_error:
4074 xmlErrMemory(ctxt, NULL);
4075 error:
4076 if (buf != NULL)
4077 xmlFree(buf);
4078 if (rep != NULL)
4079 xmlFree(rep);
4080 return(NULL);
4081 }
4082
4083 /**
4084 * xmlParseAttValue:
4085 * @ctxt: an XML parser context
4086 *
4087 * parse a value for an attribute
4088 * Note: the parser won't do substitution of entities here, this
4089 * will be handled later in xmlStringGetNodeList
4090 *
4091 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4092 * "'" ([^<&'] | Reference)* "'"
4093 *
4094 * 3.3.3 Attribute-Value Normalization:
4095 * Before the value of an attribute is passed to the application or
4096 * checked for validity, the XML processor must normalize it as follows:
4097 * - a character reference is processed by appending the referenced
4098 * character to the attribute value
4099 * - an entity reference is processed by recursively processing the
4100 * replacement text of the entity
4101 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4102 * appending #x20 to the normalized value, except that only a single
4103 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4104 * parsed entity or the literal entity value of an internal parsed entity
4105 * - other characters are processed by appending them to the normalized value
4106 * If the declared value is not CDATA, then the XML processor must further
4107 * process the normalized attribute value by discarding any leading and
4108 * trailing space (#x20) characters, and by replacing sequences of space
4109 * (#x20) characters by a single space (#x20) character.
4110 * All attributes for which no declaration has been read should be treated
4111 * by a non-validating parser as if declared CDATA.
4112 *
4113 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4114 */
4115
4116
4117 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4118 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4119 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4120 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4121 }
4122
4123 /**
4124 * xmlParseSystemLiteral:
4125 * @ctxt: an XML parser context
4126 *
4127 * parse an XML Literal
4128 *
4129 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4130 *
4131 * Returns the SystemLiteral parsed or NULL
4132 */
4133
4134 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4135 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4136 xmlChar *buf = NULL;
4137 int len = 0;
4138 int size = XML_PARSER_BUFFER_SIZE;
4139 int cur, l;
4140 xmlChar stop;
4141 int state = ctxt->instate;
4142 int count = 0;
4143
4144 SHRINK;
4145 if (RAW == '"') {
4146 NEXT;
4147 stop = '"';
4148 } else if (RAW == '\'') {
4149 NEXT;
4150 stop = '\'';
4151 } else {
4152 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4153 return(NULL);
4154 }
4155
4156 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4157 if (buf == NULL) {
4158 xmlErrMemory(ctxt, NULL);
4159 return(NULL);
4160 }
4161 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4162 cur = CUR_CHAR(l);
4163 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4164 if (len + 5 >= size) {
4165 xmlChar *tmp;
4166
4167 if ((size > XML_MAX_NAME_LENGTH) &&
4168 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4169 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4170 xmlFree(buf);
4171 ctxt->instate = (xmlParserInputState) state;
4172 return(NULL);
4173 }
4174 size *= 2;
4175 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4176 if (tmp == NULL) {
4177 xmlFree(buf);
4178 xmlErrMemory(ctxt, NULL);
4179 ctxt->instate = (xmlParserInputState) state;
4180 return(NULL);
4181 }
4182 buf = tmp;
4183 }
4184 count++;
4185 if (count > 50) {
4186 GROW;
4187 count = 0;
4188 if (ctxt->instate == XML_PARSER_EOF) {
4189 xmlFree(buf);
4190 return(NULL);
4191 }
4192 }
4193 COPY_BUF(l,buf,len,cur);
4194 NEXTL(l);
4195 cur = CUR_CHAR(l);
4196 if (cur == 0) {
4197 GROW;
4198 SHRINK;
4199 cur = CUR_CHAR(l);
4200 }
4201 }
4202 buf[len] = 0;
4203 ctxt->instate = (xmlParserInputState) state;
4204 if (!IS_CHAR(cur)) {
4205 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4206 } else {
4207 NEXT;
4208 }
4209 return(buf);
4210 }
4211
4212 /**
4213 * xmlParsePubidLiteral:
4214 * @ctxt: an XML parser context
4215 *
4216 * parse an XML public literal
4217 *
4218 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4219 *
4220 * Returns the PubidLiteral parsed or NULL.
4221 */
4222
4223 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4224 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4225 xmlChar *buf = NULL;
4226 int len = 0;
4227 int size = XML_PARSER_BUFFER_SIZE;
4228 xmlChar cur;
4229 xmlChar stop;
4230 int count = 0;
4231 xmlParserInputState oldstate = ctxt->instate;
4232
4233 SHRINK;
4234 if (RAW == '"') {
4235 NEXT;
4236 stop = '"';
4237 } else if (RAW == '\'') {
4238 NEXT;
4239 stop = '\'';
4240 } else {
4241 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4242 return(NULL);
4243 }
4244 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4245 if (buf == NULL) {
4246 xmlErrMemory(ctxt, NULL);
4247 return(NULL);
4248 }
4249 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4250 cur = CUR;
4251 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4252 if (len + 1 >= size) {
4253 xmlChar *tmp;
4254
4255 if ((size > XML_MAX_NAME_LENGTH) &&
4256 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4257 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4258 xmlFree(buf);
4259 return(NULL);
4260 }
4261 size *= 2;
4262 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4263 if (tmp == NULL) {
4264 xmlErrMemory(ctxt, NULL);
4265 xmlFree(buf);
4266 return(NULL);
4267 }
4268 buf = tmp;
4269 }
4270 buf[len++] = cur;
4271 count++;
4272 if (count > 50) {
4273 GROW;
4274 count = 0;
4275 if (ctxt->instate == XML_PARSER_EOF) {
4276 xmlFree(buf);
4277 return(NULL);
4278 }
4279 }
4280 NEXT;
4281 cur = CUR;
4282 if (cur == 0) {
4283 GROW;
4284 SHRINK;
4285 cur = CUR;
4286 }
4287 }
4288 buf[len] = 0;
4289 if (cur != stop) {
4290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4291 } else {
4292 NEXT;
4293 }
4294 ctxt->instate = oldstate;
4295 return(buf);
4296 }
4297
4298 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4299
4300 /*
4301 * used for the test in the inner loop of the char data testing
4302 */
4303 static const unsigned char test_char_data[256] = {
4304 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4305 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4306 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4307 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4308 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4309 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4310 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4311 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4312 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4313 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4314 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4315 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4316 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4317 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4318 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4319 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4320 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4321 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4322 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4323 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4324 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4336 };
4337
4338 /**
4339 * xmlParseCharData:
4340 * @ctxt: an XML parser context
4341 * @cdata: int indicating whether we are within a CDATA section
4342 *
4343 * parse a CharData section.
4344 * if we are within a CDATA section ']]>' marks an end of section.
4345 *
4346 * The right angle bracket (>) may be represented using the string ">",
4347 * and must, for compatibility, be escaped using ">" or a character
4348 * reference when it appears in the string "]]>" in content, when that
4349 * string is not marking the end of a CDATA section.
4350 *
4351 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4352 */
4353
4354 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4355 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4356 const xmlChar *in;
4357 int nbchar = 0;
4358 int line = ctxt->input->line;
4359 int col = ctxt->input->col;
4360 int ccol;
4361
4362 SHRINK;
4363 GROW;
4364 /*
4365 * Accelerated common case where input don't need to be
4366 * modified before passing it to the handler.
4367 */
4368 if (!cdata) {
4369 in = ctxt->input->cur;
4370 do {
4371 get_more_space:
4372 while (*in == 0x20) { in++; ctxt->input->col++; }
4373 if (*in == 0xA) {
4374 do {
4375 ctxt->input->line++; ctxt->input->col = 1;
4376 in++;
4377 } while (*in == 0xA);
4378 goto get_more_space;
4379 }
4380 if (*in == '<') {
4381 nbchar = in - ctxt->input->cur;
4382 if (nbchar > 0) {
4383 const xmlChar *tmp = ctxt->input->cur;
4384 ctxt->input->cur = in;
4385
4386 if ((ctxt->sax != NULL) &&
4387 (ctxt->sax->ignorableWhitespace !=
4388 ctxt->sax->characters)) {
4389 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4390 if (ctxt->sax->ignorableWhitespace != NULL)
4391 ctxt->sax->ignorableWhitespace(ctxt->userData,
4392 tmp, nbchar);
4393 } else {
4394 if (ctxt->sax->characters != NULL)
4395 ctxt->sax->characters(ctxt->userData,
4396 tmp, nbchar);
4397 if (*ctxt->space == -1)
4398 *ctxt->space = -2;
4399 }
4400 } else if ((ctxt->sax != NULL) &&
4401 (ctxt->sax->characters != NULL)) {
4402 ctxt->sax->characters(ctxt->userData,
4403 tmp, nbchar);
4404 }
4405 }
4406 return;
4407 }
4408
4409 get_more:
4410 ccol = ctxt->input->col;
4411 while (test_char_data[*in]) {
4412 in++;
4413 ccol++;
4414 }
4415 ctxt->input->col = ccol;
4416 if (*in == 0xA) {
4417 do {
4418 ctxt->input->line++; ctxt->input->col = 1;
4419 in++;
4420 } while (*in == 0xA);
4421 goto get_more;
4422 }
4423 if (*in == ']') {
4424 if ((in[1] == ']') && (in[2] == '>')) {
4425 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4426 ctxt->input->cur = in + 1;
4427 return;
4428 }
4429 in++;
4430 ctxt->input->col++;
4431 goto get_more;
4432 }
4433 nbchar = in - ctxt->input->cur;
4434 if (nbchar > 0) {
4435 if ((ctxt->sax != NULL) &&
4436 (ctxt->sax->ignorableWhitespace !=
4437 ctxt->sax->characters) &&
4438 (IS_BLANK_CH(*ctxt->input->cur))) {
4439 const xmlChar *tmp = ctxt->input->cur;
4440 ctxt->input->cur = in;
4441
4442 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4443 if (ctxt->sax->ignorableWhitespace != NULL)
4444 ctxt->sax->ignorableWhitespace(ctxt->userData,
4445 tmp, nbchar);
4446 } else {
4447 if (ctxt->sax->characters != NULL)
4448 ctxt->sax->characters(ctxt->userData,
4449 tmp, nbchar);
4450 if (*ctxt->space == -1)
4451 *ctxt->space = -2;
4452 }
4453 line = ctxt->input->line;
4454 col = ctxt->input->col;
4455 } else if (ctxt->sax != NULL) {
4456 if (ctxt->sax->characters != NULL)
4457 ctxt->sax->characters(ctxt->userData,
4458 ctxt->input->cur, nbchar);
4459 line = ctxt->input->line;
4460 col = ctxt->input->col;
4461 }
4462 /* something really bad happened in the SAX callback */
4463 if (ctxt->instate != XML_PARSER_CONTENT)
4464 return;
4465 }
4466 ctxt->input->cur = in;
4467 if (*in == 0xD) {
4468 in++;
4469 if (*in == 0xA) {
4470 ctxt->input->cur = in;
4471 in++;
4472 ctxt->input->line++; ctxt->input->col = 1;
4473 continue; /* while */
4474 }
4475 in--;
4476 }
4477 if (*in == '<') {
4478 return;
4479 }
4480 if (*in == '&') {
4481 return;
4482 }
4483 SHRINK;
4484 GROW;
4485 if (ctxt->instate == XML_PARSER_EOF)
4486 return;
4487 in = ctxt->input->cur;
4488 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4489 nbchar = 0;
4490 }
4491 ctxt->input->line = line;
4492 ctxt->input->col = col;
4493 xmlParseCharDataComplex(ctxt, cdata);
4494 }
4495
4496 /**
4497 * xmlParseCharDataComplex:
4498 * @ctxt: an XML parser context
4499 * @cdata: int indicating whether we are within a CDATA section
4500 *
4501 * parse a CharData section.this is the fallback function
4502 * of xmlParseCharData() when the parsing requires handling
4503 * of non-ASCII characters.
4504 */
4505 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4506 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4507 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4508 int nbchar = 0;
4509 int cur, l;
4510 int count = 0;
4511
4512 SHRINK;
4513 GROW;
4514 cur = CUR_CHAR(l);
4515 while ((cur != '<') && /* checked */
4516 (cur != '&') &&
4517 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4518 if ((cur == ']') && (NXT(1) == ']') &&
4519 (NXT(2) == '>')) {
4520 if (cdata) break;
4521 else {
4522 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4523 }
4524 }
4525 COPY_BUF(l,buf,nbchar,cur);
4526 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4527 buf[nbchar] = 0;
4528
4529 /*
4530 * OK the segment is to be consumed as chars.
4531 */
4532 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4533 if (areBlanks(ctxt, buf, nbchar, 0)) {
4534 if (ctxt->sax->ignorableWhitespace != NULL)
4535 ctxt->sax->ignorableWhitespace(ctxt->userData,
4536 buf, nbchar);
4537 } else {
4538 if (ctxt->sax->characters != NULL)
4539 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4540 if ((ctxt->sax->characters !=
4541 ctxt->sax->ignorableWhitespace) &&
4542 (*ctxt->space == -1))
4543 *ctxt->space = -2;
4544 }
4545 }
4546 nbchar = 0;
4547 /* something really bad happened in the SAX callback */
4548 if (ctxt->instate != XML_PARSER_CONTENT)
4549 return;
4550 }
4551 count++;
4552 if (count > 50) {
4553 GROW;
4554 count = 0;
4555 if (ctxt->instate == XML_PARSER_EOF)
4556 return;
4557 }
4558 NEXTL(l);
4559 cur = CUR_CHAR(l);
4560 }
4561 if (nbchar != 0) {
4562 buf[nbchar] = 0;
4563 /*
4564 * OK the segment is to be consumed as chars.
4565 */
4566 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4567 if (areBlanks(ctxt, buf, nbchar, 0)) {
4568 if (ctxt->sax->ignorableWhitespace != NULL)
4569 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4570 } else {
4571 if (ctxt->sax->characters != NULL)
4572 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4573 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4574 (*ctxt->space == -1))
4575 *ctxt->space = -2;
4576 }
4577 }
4578 }
4579 if ((cur != 0) && (!IS_CHAR(cur))) {
4580 /* Generate the error and skip the offending character */
4581 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4582 "PCDATA invalid Char value %d\n",
4583 cur);
4584 NEXTL(l);
4585 }
4586 }
4587
4588 /**
4589 * xmlParseExternalID:
4590 * @ctxt: an XML parser context
4591 * @publicID: a xmlChar** receiving PubidLiteral
4592 * @strict: indicate whether we should restrict parsing to only
4593 * production [75], see NOTE below
4594 *
4595 * Parse an External ID or a Public ID
4596 *
4597 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4598 * 'PUBLIC' S PubidLiteral S SystemLiteral
4599 *
4600 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4601 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4602 *
4603 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4604 *
4605 * Returns the function returns SystemLiteral and in the second
4606 * case publicID receives PubidLiteral, is strict is off
4607 * it is possible to return NULL and have publicID set.
4608 */
4609
4610 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4611 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4612 xmlChar *URI = NULL;
4613
4614 SHRINK;
4615
4616 *publicID = NULL;
4617 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4618 SKIP(6);
4619 if (SKIP_BLANKS == 0) {
4620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4621 "Space required after 'SYSTEM'\n");
4622 }
4623 URI = xmlParseSystemLiteral(ctxt);
4624 if (URI == NULL) {
4625 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4626 }
4627 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4628 SKIP(6);
4629 if (SKIP_BLANKS == 0) {
4630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4631 "Space required after 'PUBLIC'\n");
4632 }
4633 *publicID = xmlParsePubidLiteral(ctxt);
4634 if (*publicID == NULL) {
4635 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4636 }
4637 if (strict) {
4638 /*
4639 * We don't handle [83] so "S SystemLiteral" is required.
4640 */
4641 if (SKIP_BLANKS == 0) {
4642 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4643 "Space required after the Public Identifier\n");
4644 }
4645 } else {
4646 /*
4647 * We handle [83] so we return immediately, if
4648 * "S SystemLiteral" is not detected. We skip blanks if no
4649 * system literal was found, but this is harmless since we must
4650 * be at the end of a NotationDecl.
4651 */
4652 if (SKIP_BLANKS == 0) return(NULL);
4653 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4654 }
4655 URI = xmlParseSystemLiteral(ctxt);
4656 if (URI == NULL) {
4657 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4658 }
4659 }
4660 return(URI);
4661 }
4662
4663 /**
4664 * xmlParseCommentComplex:
4665 * @ctxt: an XML parser context
4666 * @buf: the already parsed part of the buffer
4667 * @len: number of bytes filles in the buffer
4668 * @size: allocated size of the buffer
4669 *
4670 * Skip an XML (SGML) comment <!-- .... -->
4671 * The spec says that "For compatibility, the string "--" (double-hyphen)
4672 * must not occur within comments. "
4673 * This is the slow routine in case the accelerator for ascii didn't work
4674 *
4675 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4676 */
4677 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4678 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4679 size_t len, size_t size) {
4680 int q, ql;
4681 int r, rl;
4682 int cur, l;
4683 size_t count = 0;
4684 int inputid;
4685
4686 inputid = ctxt->input->id;
4687
4688 if (buf == NULL) {
4689 len = 0;
4690 size = XML_PARSER_BUFFER_SIZE;
4691 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4692 if (buf == NULL) {
4693 xmlErrMemory(ctxt, NULL);
4694 return;
4695 }
4696 }
4697 GROW; /* Assure there's enough input data */
4698 q = CUR_CHAR(ql);
4699 if (q == 0)
4700 goto not_terminated;
4701 if (!IS_CHAR(q)) {
4702 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4703 "xmlParseComment: invalid xmlChar value %d\n",
4704 q);
4705 xmlFree (buf);
4706 return;
4707 }
4708 NEXTL(ql);
4709 r = CUR_CHAR(rl);
4710 if (r == 0)
4711 goto not_terminated;
4712 if (!IS_CHAR(r)) {
4713 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714 "xmlParseComment: invalid xmlChar value %d\n",
4715 q);
4716 xmlFree (buf);
4717 return;
4718 }
4719 NEXTL(rl);
4720 cur = CUR_CHAR(l);
4721 if (cur == 0)
4722 goto not_terminated;
4723 while (IS_CHAR(cur) && /* checked */
4724 ((cur != '>') ||
4725 (r != '-') || (q != '-'))) {
4726 if ((r == '-') && (q == '-')) {
4727 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4728 }
4729 if ((len > XML_MAX_TEXT_LENGTH) &&
4730 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4731 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4732 "Comment too big found", NULL);
4733 xmlFree (buf);
4734 return;
4735 }
4736 if (len + 5 >= size) {
4737 xmlChar *new_buf;
4738 size_t new_size;
4739
4740 new_size = size * 2;
4741 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4742 if (new_buf == NULL) {
4743 xmlFree (buf);
4744 xmlErrMemory(ctxt, NULL);
4745 return;
4746 }
4747 buf = new_buf;
4748 size = new_size;
4749 }
4750 COPY_BUF(ql,buf,len,q);
4751 q = r;
4752 ql = rl;
4753 r = cur;
4754 rl = l;
4755
4756 count++;
4757 if (count > 50) {
4758 GROW;
4759 count = 0;
4760 if (ctxt->instate == XML_PARSER_EOF) {
4761 xmlFree(buf);
4762 return;
4763 }
4764 }
4765 NEXTL(l);
4766 cur = CUR_CHAR(l);
4767 if (cur == 0) {
4768 SHRINK;
4769 GROW;
4770 cur = CUR_CHAR(l);
4771 }
4772 }
4773 buf[len] = 0;
4774 if (cur == 0) {
4775 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4776 "Comment not terminated \n<!--%.50s\n", buf);
4777 } else if (!IS_CHAR(cur)) {
4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779 "xmlParseComment: invalid xmlChar value %d\n",
4780 cur);
4781 } else {
4782 if (inputid != ctxt->input->id) {
4783 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4784 "Comment doesn't start and stop in the same"
4785 " entity\n");
4786 }
4787 NEXT;
4788 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4789 (!ctxt->disableSAX))
4790 ctxt->sax->comment(ctxt->userData, buf);
4791 }
4792 xmlFree(buf);
4793 return;
4794 not_terminated:
4795 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4796 "Comment not terminated\n", NULL);
4797 xmlFree(buf);
4798 return;
4799 }
4800
4801 /**
4802 * xmlParseComment:
4803 * @ctxt: an XML parser context
4804 *
4805 * Skip an XML (SGML) comment <!-- .... -->
4806 * The spec says that "For compatibility, the string "--" (double-hyphen)
4807 * must not occur within comments. "
4808 *
4809 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4810 */
4811 void
xmlParseComment(xmlParserCtxtPtr ctxt)4812 xmlParseComment(xmlParserCtxtPtr ctxt) {
4813 xmlChar *buf = NULL;
4814 size_t size = XML_PARSER_BUFFER_SIZE;
4815 size_t len = 0;
4816 xmlParserInputState state;
4817 const xmlChar *in;
4818 size_t nbchar = 0;
4819 int ccol;
4820 int inputid;
4821
4822 /*
4823 * Check that there is a comment right here.
4824 */
4825 if ((RAW != '<') || (NXT(1) != '!') ||
4826 (NXT(2) != '-') || (NXT(3) != '-')) return;
4827 state = ctxt->instate;
4828 ctxt->instate = XML_PARSER_COMMENT;
4829 inputid = ctxt->input->id;
4830 SKIP(4);
4831 SHRINK;
4832 GROW;
4833
4834 /*
4835 * Accelerated common case where input don't need to be
4836 * modified before passing it to the handler.
4837 */
4838 in = ctxt->input->cur;
4839 do {
4840 if (*in == 0xA) {
4841 do {
4842 ctxt->input->line++; ctxt->input->col = 1;
4843 in++;
4844 } while (*in == 0xA);
4845 }
4846 get_more:
4847 ccol = ctxt->input->col;
4848 while (((*in > '-') && (*in <= 0x7F)) ||
4849 ((*in >= 0x20) && (*in < '-')) ||
4850 (*in == 0x09)) {
4851 in++;
4852 ccol++;
4853 }
4854 ctxt->input->col = ccol;
4855 if (*in == 0xA) {
4856 do {
4857 ctxt->input->line++; ctxt->input->col = 1;
4858 in++;
4859 } while (*in == 0xA);
4860 goto get_more;
4861 }
4862 nbchar = in - ctxt->input->cur;
4863 /*
4864 * save current set of data
4865 */
4866 if (nbchar > 0) {
4867 if ((ctxt->sax != NULL) &&
4868 (ctxt->sax->comment != NULL)) {
4869 if (buf == NULL) {
4870 if ((*in == '-') && (in[1] == '-'))
4871 size = nbchar + 1;
4872 else
4873 size = XML_PARSER_BUFFER_SIZE + nbchar;
4874 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4875 if (buf == NULL) {
4876 xmlErrMemory(ctxt, NULL);
4877 ctxt->instate = state;
4878 return;
4879 }
4880 len = 0;
4881 } else if (len + nbchar + 1 >= size) {
4882 xmlChar *new_buf;
4883 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4884 new_buf = (xmlChar *) xmlRealloc(buf,
4885 size * sizeof(xmlChar));
4886 if (new_buf == NULL) {
4887 xmlFree (buf);
4888 xmlErrMemory(ctxt, NULL);
4889 ctxt->instate = state;
4890 return;
4891 }
4892 buf = new_buf;
4893 }
4894 memcpy(&buf[len], ctxt->input->cur, nbchar);
4895 len += nbchar;
4896 buf[len] = 0;
4897 }
4898 }
4899 if ((len > XML_MAX_TEXT_LENGTH) &&
4900 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4901 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902 "Comment too big found", NULL);
4903 xmlFree (buf);
4904 return;
4905 }
4906 ctxt->input->cur = in;
4907 if (*in == 0xA) {
4908 in++;
4909 ctxt->input->line++; ctxt->input->col = 1;
4910 }
4911 if (*in == 0xD) {
4912 in++;
4913 if (*in == 0xA) {
4914 ctxt->input->cur = in;
4915 in++;
4916 ctxt->input->line++; ctxt->input->col = 1;
4917 continue; /* while */
4918 }
4919 in--;
4920 }
4921 SHRINK;
4922 GROW;
4923 if (ctxt->instate == XML_PARSER_EOF) {
4924 xmlFree(buf);
4925 return;
4926 }
4927 in = ctxt->input->cur;
4928 if (*in == '-') {
4929 if (in[1] == '-') {
4930 if (in[2] == '>') {
4931 if (ctxt->input->id != inputid) {
4932 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4933 "comment doesn't start and stop in the"
4934 " same entity\n");
4935 }
4936 SKIP(3);
4937 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4938 (!ctxt->disableSAX)) {
4939 if (buf != NULL)
4940 ctxt->sax->comment(ctxt->userData, buf);
4941 else
4942 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4943 }
4944 if (buf != NULL)
4945 xmlFree(buf);
4946 if (ctxt->instate != XML_PARSER_EOF)
4947 ctxt->instate = state;
4948 return;
4949 }
4950 if (buf != NULL) {
4951 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4952 "Double hyphen within comment: "
4953 "<!--%.50s\n",
4954 buf);
4955 } else
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 "Double hyphen within comment\n", NULL);
4958 in++;
4959 ctxt->input->col++;
4960 }
4961 in++;
4962 ctxt->input->col++;
4963 goto get_more;
4964 }
4965 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4966 xmlParseCommentComplex(ctxt, buf, len, size);
4967 ctxt->instate = state;
4968 return;
4969 }
4970
4971
4972 /**
4973 * xmlParsePITarget:
4974 * @ctxt: an XML parser context
4975 *
4976 * parse the name of a PI
4977 *
4978 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4979 *
4980 * Returns the PITarget name or NULL
4981 */
4982
4983 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4984 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4985 const xmlChar *name;
4986
4987 name = xmlParseName(ctxt);
4988 if ((name != NULL) &&
4989 ((name[0] == 'x') || (name[0] == 'X')) &&
4990 ((name[1] == 'm') || (name[1] == 'M')) &&
4991 ((name[2] == 'l') || (name[2] == 'L'))) {
4992 int i;
4993 if ((name[0] == 'x') && (name[1] == 'm') &&
4994 (name[2] == 'l') && (name[3] == 0)) {
4995 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4996 "XML declaration allowed only at the start of the document\n");
4997 return(name);
4998 } else if (name[3] == 0) {
4999 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5000 return(name);
5001 }
5002 for (i = 0;;i++) {
5003 if (xmlW3CPIs[i] == NULL) break;
5004 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5005 return(name);
5006 }
5007 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5008 "xmlParsePITarget: invalid name prefix 'xml'\n",
5009 NULL, NULL);
5010 }
5011 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5012 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5013 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5014 }
5015 return(name);
5016 }
5017
5018 #ifdef LIBXML_CATALOG_ENABLED
5019 /**
5020 * xmlParseCatalogPI:
5021 * @ctxt: an XML parser context
5022 * @catalog: the PI value string
5023 *
5024 * parse an XML Catalog Processing Instruction.
5025 *
5026 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5027 *
5028 * Occurs only if allowed by the user and if happening in the Misc
5029 * part of the document before any doctype informations
5030 * This will add the given catalog to the parsing context in order
5031 * to be used if there is a resolution need further down in the document
5032 */
5033
5034 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5035 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5036 xmlChar *URL = NULL;
5037 const xmlChar *tmp, *base;
5038 xmlChar marker;
5039
5040 tmp = catalog;
5041 while (IS_BLANK_CH(*tmp)) tmp++;
5042 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5043 goto error;
5044 tmp += 7;
5045 while (IS_BLANK_CH(*tmp)) tmp++;
5046 if (*tmp != '=') {
5047 return;
5048 }
5049 tmp++;
5050 while (IS_BLANK_CH(*tmp)) tmp++;
5051 marker = *tmp;
5052 if ((marker != '\'') && (marker != '"'))
5053 goto error;
5054 tmp++;
5055 base = tmp;
5056 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5057 if (*tmp == 0)
5058 goto error;
5059 URL = xmlStrndup(base, tmp - base);
5060 tmp++;
5061 while (IS_BLANK_CH(*tmp)) tmp++;
5062 if (*tmp != 0)
5063 goto error;
5064
5065 if (URL != NULL) {
5066 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5067 xmlFree(URL);
5068 }
5069 return;
5070
5071 error:
5072 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5073 "Catalog PI syntax error: %s\n",
5074 catalog, NULL);
5075 if (URL != NULL)
5076 xmlFree(URL);
5077 }
5078 #endif
5079
5080 /**
5081 * xmlParsePI:
5082 * @ctxt: an XML parser context
5083 *
5084 * parse an XML Processing Instruction.
5085 *
5086 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5087 *
5088 * The processing is transfered to SAX once parsed.
5089 */
5090
5091 void
xmlParsePI(xmlParserCtxtPtr ctxt)5092 xmlParsePI(xmlParserCtxtPtr ctxt) {
5093 xmlChar *buf = NULL;
5094 size_t len = 0;
5095 size_t size = XML_PARSER_BUFFER_SIZE;
5096 int cur, l;
5097 const xmlChar *target;
5098 xmlParserInputState state;
5099 int count = 0;
5100
5101 if ((RAW == '<') && (NXT(1) == '?')) {
5102 int inputid = ctxt->input->id;
5103 state = ctxt->instate;
5104 ctxt->instate = XML_PARSER_PI;
5105 /*
5106 * this is a Processing Instruction.
5107 */
5108 SKIP(2);
5109 SHRINK;
5110
5111 /*
5112 * Parse the target name and check for special support like
5113 * namespace.
5114 */
5115 target = xmlParsePITarget(ctxt);
5116 if (target != NULL) {
5117 if ((RAW == '?') && (NXT(1) == '>')) {
5118 if (inputid != ctxt->input->id) {
5119 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5120 "PI declaration doesn't start and stop in"
5121 " the same entity\n");
5122 }
5123 SKIP(2);
5124
5125 /*
5126 * SAX: PI detected.
5127 */
5128 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5129 (ctxt->sax->processingInstruction != NULL))
5130 ctxt->sax->processingInstruction(ctxt->userData,
5131 target, NULL);
5132 if (ctxt->instate != XML_PARSER_EOF)
5133 ctxt->instate = state;
5134 return;
5135 }
5136 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5137 if (buf == NULL) {
5138 xmlErrMemory(ctxt, NULL);
5139 ctxt->instate = state;
5140 return;
5141 }
5142 if (SKIP_BLANKS == 0) {
5143 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5144 "ParsePI: PI %s space expected\n", target);
5145 }
5146 cur = CUR_CHAR(l);
5147 while (IS_CHAR(cur) && /* checked */
5148 ((cur != '?') || (NXT(1) != '>'))) {
5149 if (len + 5 >= size) {
5150 xmlChar *tmp;
5151 size_t new_size = size * 2;
5152 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5153 if (tmp == NULL) {
5154 xmlErrMemory(ctxt, NULL);
5155 xmlFree(buf);
5156 ctxt->instate = state;
5157 return;
5158 }
5159 buf = tmp;
5160 size = new_size;
5161 }
5162 count++;
5163 if (count > 50) {
5164 GROW;
5165 if (ctxt->instate == XML_PARSER_EOF) {
5166 xmlFree(buf);
5167 return;
5168 }
5169 count = 0;
5170 if ((len > XML_MAX_TEXT_LENGTH) &&
5171 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5172 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5173 "PI %s too big found", target);
5174 xmlFree(buf);
5175 ctxt->instate = state;
5176 return;
5177 }
5178 }
5179 COPY_BUF(l,buf,len,cur);
5180 NEXTL(l);
5181 cur = CUR_CHAR(l);
5182 if (cur == 0) {
5183 SHRINK;
5184 GROW;
5185 cur = CUR_CHAR(l);
5186 }
5187 }
5188 if ((len > XML_MAX_TEXT_LENGTH) &&
5189 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5190 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5191 "PI %s too big found", target);
5192 xmlFree(buf);
5193 ctxt->instate = state;
5194 return;
5195 }
5196 buf[len] = 0;
5197 if (cur != '?') {
5198 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5199 "ParsePI: PI %s never end ...\n", target);
5200 } else {
5201 if (inputid != ctxt->input->id) {
5202 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5203 "PI declaration doesn't start and stop in"
5204 " the same entity\n");
5205 }
5206 SKIP(2);
5207
5208 #ifdef LIBXML_CATALOG_ENABLED
5209 if (((state == XML_PARSER_MISC) ||
5210 (state == XML_PARSER_START)) &&
5211 (xmlStrEqual(target, XML_CATALOG_PI))) {
5212 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5213 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5214 (allow == XML_CATA_ALLOW_ALL))
5215 xmlParseCatalogPI(ctxt, buf);
5216 }
5217 #endif
5218
5219
5220 /*
5221 * SAX: PI detected.
5222 */
5223 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 (ctxt->sax->processingInstruction != NULL))
5225 ctxt->sax->processingInstruction(ctxt->userData,
5226 target, buf);
5227 }
5228 xmlFree(buf);
5229 } else {
5230 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5231 }
5232 if (ctxt->instate != XML_PARSER_EOF)
5233 ctxt->instate = state;
5234 }
5235 }
5236
5237 /**
5238 * xmlParseNotationDecl:
5239 * @ctxt: an XML parser context
5240 *
5241 * parse a notation declaration
5242 *
5243 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5244 *
5245 * Hence there is actually 3 choices:
5246 * 'PUBLIC' S PubidLiteral
5247 * 'PUBLIC' S PubidLiteral S SystemLiteral
5248 * and 'SYSTEM' S SystemLiteral
5249 *
5250 * See the NOTE on xmlParseExternalID().
5251 */
5252
5253 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5254 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5255 const xmlChar *name;
5256 xmlChar *Pubid;
5257 xmlChar *Systemid;
5258
5259 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5260 int inputid = ctxt->input->id;
5261 SHRINK;
5262 SKIP(10);
5263 if (SKIP_BLANKS == 0) {
5264 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5265 "Space required after '<!NOTATION'\n");
5266 return;
5267 }
5268
5269 name = xmlParseName(ctxt);
5270 if (name == NULL) {
5271 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5272 return;
5273 }
5274 if (xmlStrchr(name, ':') != NULL) {
5275 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5276 "colons are forbidden from notation names '%s'\n",
5277 name, NULL, NULL);
5278 }
5279 if (SKIP_BLANKS == 0) {
5280 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5281 "Space required after the NOTATION name'\n");
5282 return;
5283 }
5284
5285 /*
5286 * Parse the IDs.
5287 */
5288 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5289 SKIP_BLANKS;
5290
5291 if (RAW == '>') {
5292 if (inputid != ctxt->input->id) {
5293 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5294 "Notation declaration doesn't start and stop"
5295 " in the same entity\n");
5296 }
5297 NEXT;
5298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 (ctxt->sax->notationDecl != NULL))
5300 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5301 } else {
5302 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5303 }
5304 if (Systemid != NULL) xmlFree(Systemid);
5305 if (Pubid != NULL) xmlFree(Pubid);
5306 }
5307 }
5308
5309 /**
5310 * xmlParseEntityDecl:
5311 * @ctxt: an XML parser context
5312 *
5313 * parse <!ENTITY declarations
5314 *
5315 * [70] EntityDecl ::= GEDecl | PEDecl
5316 *
5317 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5318 *
5319 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5320 *
5321 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5322 *
5323 * [74] PEDef ::= EntityValue | ExternalID
5324 *
5325 * [76] NDataDecl ::= S 'NDATA' S Name
5326 *
5327 * [ VC: Notation Declared ]
5328 * The Name must match the declared name of a notation.
5329 */
5330
5331 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5332 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5333 const xmlChar *name = NULL;
5334 xmlChar *value = NULL;
5335 xmlChar *URI = NULL, *literal = NULL;
5336 const xmlChar *ndata = NULL;
5337 int isParameter = 0;
5338 xmlChar *orig = NULL;
5339
5340 /* GROW; done in the caller */
5341 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5342 int inputid = ctxt->input->id;
5343 SHRINK;
5344 SKIP(8);
5345 if (SKIP_BLANKS == 0) {
5346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5347 "Space required after '<!ENTITY'\n");
5348 }
5349
5350 if (RAW == '%') {
5351 NEXT;
5352 if (SKIP_BLANKS == 0) {
5353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5354 "Space required after '%%'\n");
5355 }
5356 isParameter = 1;
5357 }
5358
5359 name = xmlParseName(ctxt);
5360 if (name == NULL) {
5361 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5362 "xmlParseEntityDecl: no name\n");
5363 return;
5364 }
5365 if (xmlStrchr(name, ':') != NULL) {
5366 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5367 "colons are forbidden from entities names '%s'\n",
5368 name, NULL, NULL);
5369 }
5370 if (SKIP_BLANKS == 0) {
5371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5372 "Space required after the entity name\n");
5373 }
5374
5375 ctxt->instate = XML_PARSER_ENTITY_DECL;
5376 /*
5377 * handle the various case of definitions...
5378 */
5379 if (isParameter) {
5380 if ((RAW == '"') || (RAW == '\'')) {
5381 value = xmlParseEntityValue(ctxt, &orig);
5382 if (value) {
5383 if ((ctxt->sax != NULL) &&
5384 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5385 ctxt->sax->entityDecl(ctxt->userData, name,
5386 XML_INTERNAL_PARAMETER_ENTITY,
5387 NULL, NULL, value);
5388 }
5389 } else {
5390 URI = xmlParseExternalID(ctxt, &literal, 1);
5391 if ((URI == NULL) && (literal == NULL)) {
5392 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5393 }
5394 if (URI) {
5395 xmlURIPtr uri;
5396
5397 uri = xmlParseURI((const char *) URI);
5398 if (uri == NULL) {
5399 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5400 "Invalid URI: %s\n", URI);
5401 /*
5402 * This really ought to be a well formedness error
5403 * but the XML Core WG decided otherwise c.f. issue
5404 * E26 of the XML erratas.
5405 */
5406 } else {
5407 if (uri->fragment != NULL) {
5408 /*
5409 * Okay this is foolish to block those but not
5410 * invalid URIs.
5411 */
5412 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5413 } else {
5414 if ((ctxt->sax != NULL) &&
5415 (!ctxt->disableSAX) &&
5416 (ctxt->sax->entityDecl != NULL))
5417 ctxt->sax->entityDecl(ctxt->userData, name,
5418 XML_EXTERNAL_PARAMETER_ENTITY,
5419 literal, URI, NULL);
5420 }
5421 xmlFreeURI(uri);
5422 }
5423 }
5424 }
5425 } else {
5426 if ((RAW == '"') || (RAW == '\'')) {
5427 value = xmlParseEntityValue(ctxt, &orig);
5428 if ((ctxt->sax != NULL) &&
5429 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5430 ctxt->sax->entityDecl(ctxt->userData, name,
5431 XML_INTERNAL_GENERAL_ENTITY,
5432 NULL, NULL, value);
5433 /*
5434 * For expat compatibility in SAX mode.
5435 */
5436 if ((ctxt->myDoc == NULL) ||
5437 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5438 if (ctxt->myDoc == NULL) {
5439 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5440 if (ctxt->myDoc == NULL) {
5441 xmlErrMemory(ctxt, "New Doc failed");
5442 return;
5443 }
5444 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5445 }
5446 if (ctxt->myDoc->intSubset == NULL)
5447 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5448 BAD_CAST "fake", NULL, NULL);
5449
5450 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5451 NULL, NULL, value);
5452 }
5453 } else {
5454 URI = xmlParseExternalID(ctxt, &literal, 1);
5455 if ((URI == NULL) && (literal == NULL)) {
5456 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5457 }
5458 if (URI) {
5459 xmlURIPtr uri;
5460
5461 uri = xmlParseURI((const char *)URI);
5462 if (uri == NULL) {
5463 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5464 "Invalid URI: %s\n", URI);
5465 /*
5466 * This really ought to be a well formedness error
5467 * but the XML Core WG decided otherwise c.f. issue
5468 * E26 of the XML erratas.
5469 */
5470 } else {
5471 if (uri->fragment != NULL) {
5472 /*
5473 * Okay this is foolish to block those but not
5474 * invalid URIs.
5475 */
5476 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5477 }
5478 xmlFreeURI(uri);
5479 }
5480 }
5481 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5483 "Space required before 'NDATA'\n");
5484 }
5485 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5486 SKIP(5);
5487 if (SKIP_BLANKS == 0) {
5488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5489 "Space required after 'NDATA'\n");
5490 }
5491 ndata = xmlParseName(ctxt);
5492 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5493 (ctxt->sax->unparsedEntityDecl != NULL))
5494 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5495 literal, URI, ndata);
5496 } else {
5497 if ((ctxt->sax != NULL) &&
5498 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499 ctxt->sax->entityDecl(ctxt->userData, name,
5500 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5501 literal, URI, NULL);
5502 /*
5503 * For expat compatibility in SAX mode.
5504 * assuming the entity repalcement was asked for
5505 */
5506 if ((ctxt->replaceEntities != 0) &&
5507 ((ctxt->myDoc == NULL) ||
5508 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5509 if (ctxt->myDoc == NULL) {
5510 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5511 if (ctxt->myDoc == NULL) {
5512 xmlErrMemory(ctxt, "New Doc failed");
5513 return;
5514 }
5515 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5516 }
5517
5518 if (ctxt->myDoc->intSubset == NULL)
5519 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5520 BAD_CAST "fake", NULL, NULL);
5521 xmlSAX2EntityDecl(ctxt, name,
5522 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5523 literal, URI, NULL);
5524 }
5525 }
5526 }
5527 }
5528 if (ctxt->instate == XML_PARSER_EOF)
5529 goto done;
5530 SKIP_BLANKS;
5531 if (RAW != '>') {
5532 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5533 "xmlParseEntityDecl: entity %s not terminated\n", name);
5534 xmlHaltParser(ctxt);
5535 } else {
5536 if (inputid != ctxt->input->id) {
5537 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5538 "Entity declaration doesn't start and stop in"
5539 " the same entity\n");
5540 }
5541 NEXT;
5542 }
5543 if (orig != NULL) {
5544 /*
5545 * Ugly mechanism to save the raw entity value.
5546 */
5547 xmlEntityPtr cur = NULL;
5548
5549 if (isParameter) {
5550 if ((ctxt->sax != NULL) &&
5551 (ctxt->sax->getParameterEntity != NULL))
5552 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5553 } else {
5554 if ((ctxt->sax != NULL) &&
5555 (ctxt->sax->getEntity != NULL))
5556 cur = ctxt->sax->getEntity(ctxt->userData, name);
5557 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5558 cur = xmlSAX2GetEntity(ctxt, name);
5559 }
5560 }
5561 if ((cur != NULL) && (cur->orig == NULL)) {
5562 cur->orig = orig;
5563 orig = NULL;
5564 }
5565 }
5566
5567 done:
5568 if (value != NULL) xmlFree(value);
5569 if (URI != NULL) xmlFree(URI);
5570 if (literal != NULL) xmlFree(literal);
5571 if (orig != NULL) xmlFree(orig);
5572 }
5573 }
5574
5575 /**
5576 * xmlParseDefaultDecl:
5577 * @ctxt: an XML parser context
5578 * @value: Receive a possible fixed default value for the attribute
5579 *
5580 * Parse an attribute default declaration
5581 *
5582 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5583 *
5584 * [ VC: Required Attribute ]
5585 * if the default declaration is the keyword #REQUIRED, then the
5586 * attribute must be specified for all elements of the type in the
5587 * attribute-list declaration.
5588 *
5589 * [ VC: Attribute Default Legal ]
5590 * The declared default value must meet the lexical constraints of
5591 * the declared attribute type c.f. xmlValidateAttributeDecl()
5592 *
5593 * [ VC: Fixed Attribute Default ]
5594 * if an attribute has a default value declared with the #FIXED
5595 * keyword, instances of that attribute must match the default value.
5596 *
5597 * [ WFC: No < in Attribute Values ]
5598 * handled in xmlParseAttValue()
5599 *
5600 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5601 * or XML_ATTRIBUTE_FIXED.
5602 */
5603
5604 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5605 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5606 int val;
5607 xmlChar *ret;
5608
5609 *value = NULL;
5610 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5611 SKIP(9);
5612 return(XML_ATTRIBUTE_REQUIRED);
5613 }
5614 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5615 SKIP(8);
5616 return(XML_ATTRIBUTE_IMPLIED);
5617 }
5618 val = XML_ATTRIBUTE_NONE;
5619 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5620 SKIP(6);
5621 val = XML_ATTRIBUTE_FIXED;
5622 if (SKIP_BLANKS == 0) {
5623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5624 "Space required after '#FIXED'\n");
5625 }
5626 }
5627 ret = xmlParseAttValue(ctxt);
5628 ctxt->instate = XML_PARSER_DTD;
5629 if (ret == NULL) {
5630 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5631 "Attribute default value declaration error\n");
5632 } else
5633 *value = ret;
5634 return(val);
5635 }
5636
5637 /**
5638 * xmlParseNotationType:
5639 * @ctxt: an XML parser context
5640 *
5641 * parse an Notation attribute type.
5642 *
5643 * Note: the leading 'NOTATION' S part has already being parsed...
5644 *
5645 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5646 *
5647 * [ VC: Notation Attributes ]
5648 * Values of this type must match one of the notation names included
5649 * in the declaration; all notation names in the declaration must be declared.
5650 *
5651 * Returns: the notation attribute tree built while parsing
5652 */
5653
5654 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5655 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5656 const xmlChar *name;
5657 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5658
5659 if (RAW != '(') {
5660 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5661 return(NULL);
5662 }
5663 SHRINK;
5664 do {
5665 NEXT;
5666 SKIP_BLANKS;
5667 name = xmlParseName(ctxt);
5668 if (name == NULL) {
5669 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5670 "Name expected in NOTATION declaration\n");
5671 xmlFreeEnumeration(ret);
5672 return(NULL);
5673 }
5674 tmp = ret;
5675 while (tmp != NULL) {
5676 if (xmlStrEqual(name, tmp->name)) {
5677 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5678 "standalone: attribute notation value token %s duplicated\n",
5679 name, NULL);
5680 if (!xmlDictOwns(ctxt->dict, name))
5681 xmlFree((xmlChar *) name);
5682 break;
5683 }
5684 tmp = tmp->next;
5685 }
5686 if (tmp == NULL) {
5687 cur = xmlCreateEnumeration(name);
5688 if (cur == NULL) {
5689 xmlFreeEnumeration(ret);
5690 return(NULL);
5691 }
5692 if (last == NULL) ret = last = cur;
5693 else {
5694 last->next = cur;
5695 last = cur;
5696 }
5697 }
5698 SKIP_BLANKS;
5699 } while (RAW == '|');
5700 if (RAW != ')') {
5701 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5702 xmlFreeEnumeration(ret);
5703 return(NULL);
5704 }
5705 NEXT;
5706 return(ret);
5707 }
5708
5709 /**
5710 * xmlParseEnumerationType:
5711 * @ctxt: an XML parser context
5712 *
5713 * parse an Enumeration attribute type.
5714 *
5715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5716 *
5717 * [ VC: Enumeration ]
5718 * Values of this type must match one of the Nmtoken tokens in
5719 * the declaration
5720 *
5721 * Returns: the enumeration attribute tree built while parsing
5722 */
5723
5724 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5725 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5726 xmlChar *name;
5727 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5728
5729 if (RAW != '(') {
5730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5731 return(NULL);
5732 }
5733 SHRINK;
5734 do {
5735 NEXT;
5736 SKIP_BLANKS;
5737 name = xmlParseNmtoken(ctxt);
5738 if (name == NULL) {
5739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5740 return(ret);
5741 }
5742 tmp = ret;
5743 while (tmp != NULL) {
5744 if (xmlStrEqual(name, tmp->name)) {
5745 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5746 "standalone: attribute enumeration value token %s duplicated\n",
5747 name, NULL);
5748 if (!xmlDictOwns(ctxt->dict, name))
5749 xmlFree(name);
5750 break;
5751 }
5752 tmp = tmp->next;
5753 }
5754 if (tmp == NULL) {
5755 cur = xmlCreateEnumeration(name);
5756 if (!xmlDictOwns(ctxt->dict, name))
5757 xmlFree(name);
5758 if (cur == NULL) {
5759 xmlFreeEnumeration(ret);
5760 return(NULL);
5761 }
5762 if (last == NULL) ret = last = cur;
5763 else {
5764 last->next = cur;
5765 last = cur;
5766 }
5767 }
5768 SKIP_BLANKS;
5769 } while (RAW == '|');
5770 if (RAW != ')') {
5771 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5772 return(ret);
5773 }
5774 NEXT;
5775 return(ret);
5776 }
5777
5778 /**
5779 * xmlParseEnumeratedType:
5780 * @ctxt: an XML parser context
5781 * @tree: the enumeration tree built while parsing
5782 *
5783 * parse an Enumerated attribute type.
5784 *
5785 * [57] EnumeratedType ::= NotationType | Enumeration
5786 *
5787 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5788 *
5789 *
5790 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5791 */
5792
5793 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5794 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5795 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5796 SKIP(8);
5797 if (SKIP_BLANKS == 0) {
5798 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5799 "Space required after 'NOTATION'\n");
5800 return(0);
5801 }
5802 *tree = xmlParseNotationType(ctxt);
5803 if (*tree == NULL) return(0);
5804 return(XML_ATTRIBUTE_NOTATION);
5805 }
5806 *tree = xmlParseEnumerationType(ctxt);
5807 if (*tree == NULL) return(0);
5808 return(XML_ATTRIBUTE_ENUMERATION);
5809 }
5810
5811 /**
5812 * xmlParseAttributeType:
5813 * @ctxt: an XML parser context
5814 * @tree: the enumeration tree built while parsing
5815 *
5816 * parse the Attribute list def for an element
5817 *
5818 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5819 *
5820 * [55] StringType ::= 'CDATA'
5821 *
5822 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5823 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5824 *
5825 * Validity constraints for attribute values syntax are checked in
5826 * xmlValidateAttributeValue()
5827 *
5828 * [ VC: ID ]
5829 * Values of type ID must match the Name production. A name must not
5830 * appear more than once in an XML document as a value of this type;
5831 * i.e., ID values must uniquely identify the elements which bear them.
5832 *
5833 * [ VC: One ID per Element Type ]
5834 * No element type may have more than one ID attribute specified.
5835 *
5836 * [ VC: ID Attribute Default ]
5837 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5838 *
5839 * [ VC: IDREF ]
5840 * Values of type IDREF must match the Name production, and values
5841 * of type IDREFS must match Names; each IDREF Name must match the value
5842 * of an ID attribute on some element in the XML document; i.e. IDREF
5843 * values must match the value of some ID attribute.
5844 *
5845 * [ VC: Entity Name ]
5846 * Values of type ENTITY must match the Name production, values
5847 * of type ENTITIES must match Names; each Entity Name must match the
5848 * name of an unparsed entity declared in the DTD.
5849 *
5850 * [ VC: Name Token ]
5851 * Values of type NMTOKEN must match the Nmtoken production; values
5852 * of type NMTOKENS must match Nmtokens.
5853 *
5854 * Returns the attribute type
5855 */
5856 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5857 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5858 SHRINK;
5859 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5860 SKIP(5);
5861 return(XML_ATTRIBUTE_CDATA);
5862 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5863 SKIP(6);
5864 return(XML_ATTRIBUTE_IDREFS);
5865 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5866 SKIP(5);
5867 return(XML_ATTRIBUTE_IDREF);
5868 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5869 SKIP(2);
5870 return(XML_ATTRIBUTE_ID);
5871 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5872 SKIP(6);
5873 return(XML_ATTRIBUTE_ENTITY);
5874 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5875 SKIP(8);
5876 return(XML_ATTRIBUTE_ENTITIES);
5877 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5878 SKIP(8);
5879 return(XML_ATTRIBUTE_NMTOKENS);
5880 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5881 SKIP(7);
5882 return(XML_ATTRIBUTE_NMTOKEN);
5883 }
5884 return(xmlParseEnumeratedType(ctxt, tree));
5885 }
5886
5887 /**
5888 * xmlParseAttributeListDecl:
5889 * @ctxt: an XML parser context
5890 *
5891 * : parse the Attribute list def for an element
5892 *
5893 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5894 *
5895 * [53] AttDef ::= S Name S AttType S DefaultDecl
5896 *
5897 */
5898 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5899 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5900 const xmlChar *elemName;
5901 const xmlChar *attrName;
5902 xmlEnumerationPtr tree;
5903
5904 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5905 int inputid = ctxt->input->id;
5906
5907 SKIP(9);
5908 if (SKIP_BLANKS == 0) {
5909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5910 "Space required after '<!ATTLIST'\n");
5911 }
5912 elemName = xmlParseName(ctxt);
5913 if (elemName == NULL) {
5914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5915 "ATTLIST: no name for Element\n");
5916 return;
5917 }
5918 SKIP_BLANKS;
5919 GROW;
5920 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5921 int type;
5922 int def;
5923 xmlChar *defaultValue = NULL;
5924
5925 GROW;
5926 tree = NULL;
5927 attrName = xmlParseName(ctxt);
5928 if (attrName == NULL) {
5929 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5930 "ATTLIST: no name for Attribute\n");
5931 break;
5932 }
5933 GROW;
5934 if (SKIP_BLANKS == 0) {
5935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5936 "Space required after the attribute name\n");
5937 break;
5938 }
5939
5940 type = xmlParseAttributeType(ctxt, &tree);
5941 if (type <= 0) {
5942 break;
5943 }
5944
5945 GROW;
5946 if (SKIP_BLANKS == 0) {
5947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5948 "Space required after the attribute type\n");
5949 if (tree != NULL)
5950 xmlFreeEnumeration(tree);
5951 break;
5952 }
5953
5954 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5955 if (def <= 0) {
5956 if (defaultValue != NULL)
5957 xmlFree(defaultValue);
5958 if (tree != NULL)
5959 xmlFreeEnumeration(tree);
5960 break;
5961 }
5962 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5963 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5964
5965 GROW;
5966 if (RAW != '>') {
5967 if (SKIP_BLANKS == 0) {
5968 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5969 "Space required after the attribute default value\n");
5970 if (defaultValue != NULL)
5971 xmlFree(defaultValue);
5972 if (tree != NULL)
5973 xmlFreeEnumeration(tree);
5974 break;
5975 }
5976 }
5977 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5978 (ctxt->sax->attributeDecl != NULL))
5979 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5980 type, def, defaultValue, tree);
5981 else if (tree != NULL)
5982 xmlFreeEnumeration(tree);
5983
5984 if ((ctxt->sax2) && (defaultValue != NULL) &&
5985 (def != XML_ATTRIBUTE_IMPLIED) &&
5986 (def != XML_ATTRIBUTE_REQUIRED)) {
5987 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5988 }
5989 if (ctxt->sax2) {
5990 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5991 }
5992 if (defaultValue != NULL)
5993 xmlFree(defaultValue);
5994 GROW;
5995 }
5996 if (RAW == '>') {
5997 if (inputid != ctxt->input->id) {
5998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5999 "Attribute list declaration doesn't start and"
6000 " stop in the same entity\n");
6001 }
6002 NEXT;
6003 }
6004 }
6005 }
6006
6007 /**
6008 * xmlParseElementMixedContentDecl:
6009 * @ctxt: an XML parser context
6010 * @inputchk: the input used for the current entity, needed for boundary checks
6011 *
6012 * parse the declaration for a Mixed Element content
6013 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6014 *
6015 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6016 * '(' S? '#PCDATA' S? ')'
6017 *
6018 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6019 *
6020 * [ VC: No Duplicate Types ]
6021 * The same name must not appear more than once in a single
6022 * mixed-content declaration.
6023 *
6024 * returns: the list of the xmlElementContentPtr describing the element choices
6025 */
6026 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6027 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6028 xmlElementContentPtr ret = NULL, cur = NULL, n;
6029 const xmlChar *elem = NULL;
6030
6031 GROW;
6032 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6033 SKIP(7);
6034 SKIP_BLANKS;
6035 SHRINK;
6036 if (RAW == ')') {
6037 if (ctxt->input->id != inputchk) {
6038 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6039 "Element content declaration doesn't start and"
6040 " stop in the same entity\n");
6041 }
6042 NEXT;
6043 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6044 if (ret == NULL)
6045 return(NULL);
6046 if (RAW == '*') {
6047 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6048 NEXT;
6049 }
6050 return(ret);
6051 }
6052 if ((RAW == '(') || (RAW == '|')) {
6053 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6054 if (ret == NULL) return(NULL);
6055 }
6056 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6057 NEXT;
6058 if (elem == NULL) {
6059 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6060 if (ret == NULL) return(NULL);
6061 ret->c1 = cur;
6062 if (cur != NULL)
6063 cur->parent = ret;
6064 cur = ret;
6065 } else {
6066 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6067 if (n == NULL) return(NULL);
6068 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6069 if (n->c1 != NULL)
6070 n->c1->parent = n;
6071 cur->c2 = n;
6072 if (n != NULL)
6073 n->parent = cur;
6074 cur = n;
6075 }
6076 SKIP_BLANKS;
6077 elem = xmlParseName(ctxt);
6078 if (elem == NULL) {
6079 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6080 "xmlParseElementMixedContentDecl : Name expected\n");
6081 xmlFreeDocElementContent(ctxt->myDoc, ret);
6082 return(NULL);
6083 }
6084 SKIP_BLANKS;
6085 GROW;
6086 }
6087 if ((RAW == ')') && (NXT(1) == '*')) {
6088 if (elem != NULL) {
6089 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6090 XML_ELEMENT_CONTENT_ELEMENT);
6091 if (cur->c2 != NULL)
6092 cur->c2->parent = cur;
6093 }
6094 if (ret != NULL)
6095 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6096 if (ctxt->input->id != inputchk) {
6097 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6098 "Element content declaration doesn't start and"
6099 " stop in the same entity\n");
6100 }
6101 SKIP(2);
6102 } else {
6103 xmlFreeDocElementContent(ctxt->myDoc, ret);
6104 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6105 return(NULL);
6106 }
6107
6108 } else {
6109 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6110 }
6111 return(ret);
6112 }
6113
6114 /**
6115 * xmlParseElementChildrenContentDeclPriv:
6116 * @ctxt: an XML parser context
6117 * @inputchk: the input used for the current entity, needed for boundary checks
6118 * @depth: the level of recursion
6119 *
6120 * parse the declaration for a Mixed Element content
6121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6122 *
6123 *
6124 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6125 *
6126 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6127 *
6128 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6129 *
6130 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6131 *
6132 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6133 * TODO Parameter-entity replacement text must be properly nested
6134 * with parenthesized groups. That is to say, if either of the
6135 * opening or closing parentheses in a choice, seq, or Mixed
6136 * construct is contained in the replacement text for a parameter
6137 * entity, both must be contained in the same replacement text. For
6138 * interoperability, if a parameter-entity reference appears in a
6139 * choice, seq, or Mixed construct, its replacement text should not
6140 * be empty, and neither the first nor last non-blank character of
6141 * the replacement text should be a connector (| or ,).
6142 *
6143 * Returns the tree of xmlElementContentPtr describing the element
6144 * hierarchy.
6145 */
6146 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6147 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6148 int depth) {
6149 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6150 const xmlChar *elem;
6151 xmlChar type = 0;
6152
6153 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6154 (depth > 2048)) {
6155 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6156 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6157 depth);
6158 return(NULL);
6159 }
6160 SKIP_BLANKS;
6161 GROW;
6162 if (RAW == '(') {
6163 int inputid = ctxt->input->id;
6164
6165 /* Recurse on first child */
6166 NEXT;
6167 SKIP_BLANKS;
6168 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6169 depth + 1);
6170 SKIP_BLANKS;
6171 GROW;
6172 } else {
6173 elem = xmlParseName(ctxt);
6174 if (elem == NULL) {
6175 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6176 return(NULL);
6177 }
6178 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6179 if (cur == NULL) {
6180 xmlErrMemory(ctxt, NULL);
6181 return(NULL);
6182 }
6183 GROW;
6184 if (RAW == '?') {
6185 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6186 NEXT;
6187 } else if (RAW == '*') {
6188 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6189 NEXT;
6190 } else if (RAW == '+') {
6191 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6192 NEXT;
6193 } else {
6194 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6195 }
6196 GROW;
6197 }
6198 SKIP_BLANKS;
6199 SHRINK;
6200 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6201 /*
6202 * Each loop we parse one separator and one element.
6203 */
6204 if (RAW == ',') {
6205 if (type == 0) type = CUR;
6206
6207 /*
6208 * Detect "Name | Name , Name" error
6209 */
6210 else if (type != CUR) {
6211 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6212 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6213 type);
6214 if ((last != NULL) && (last != ret))
6215 xmlFreeDocElementContent(ctxt->myDoc, last);
6216 if (ret != NULL)
6217 xmlFreeDocElementContent(ctxt->myDoc, ret);
6218 return(NULL);
6219 }
6220 NEXT;
6221
6222 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6223 if (op == NULL) {
6224 if ((last != NULL) && (last != ret))
6225 xmlFreeDocElementContent(ctxt->myDoc, last);
6226 xmlFreeDocElementContent(ctxt->myDoc, ret);
6227 return(NULL);
6228 }
6229 if (last == NULL) {
6230 op->c1 = ret;
6231 if (ret != NULL)
6232 ret->parent = op;
6233 ret = cur = op;
6234 } else {
6235 cur->c2 = op;
6236 if (op != NULL)
6237 op->parent = cur;
6238 op->c1 = last;
6239 if (last != NULL)
6240 last->parent = op;
6241 cur =op;
6242 last = NULL;
6243 }
6244 } else if (RAW == '|') {
6245 if (type == 0) type = CUR;
6246
6247 /*
6248 * Detect "Name , Name | Name" error
6249 */
6250 else if (type != CUR) {
6251 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6252 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6253 type);
6254 if ((last != NULL) && (last != ret))
6255 xmlFreeDocElementContent(ctxt->myDoc, last);
6256 if (ret != NULL)
6257 xmlFreeDocElementContent(ctxt->myDoc, ret);
6258 return(NULL);
6259 }
6260 NEXT;
6261
6262 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6263 if (op == NULL) {
6264 if ((last != NULL) && (last != ret))
6265 xmlFreeDocElementContent(ctxt->myDoc, last);
6266 if (ret != NULL)
6267 xmlFreeDocElementContent(ctxt->myDoc, ret);
6268 return(NULL);
6269 }
6270 if (last == NULL) {
6271 op->c1 = ret;
6272 if (ret != NULL)
6273 ret->parent = op;
6274 ret = cur = op;
6275 } else {
6276 cur->c2 = op;
6277 if (op != NULL)
6278 op->parent = cur;
6279 op->c1 = last;
6280 if (last != NULL)
6281 last->parent = op;
6282 cur =op;
6283 last = NULL;
6284 }
6285 } else {
6286 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6287 if ((last != NULL) && (last != ret))
6288 xmlFreeDocElementContent(ctxt->myDoc, last);
6289 if (ret != NULL)
6290 xmlFreeDocElementContent(ctxt->myDoc, ret);
6291 return(NULL);
6292 }
6293 GROW;
6294 SKIP_BLANKS;
6295 GROW;
6296 if (RAW == '(') {
6297 int inputid = ctxt->input->id;
6298 /* Recurse on second child */
6299 NEXT;
6300 SKIP_BLANKS;
6301 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6302 depth + 1);
6303 SKIP_BLANKS;
6304 } else {
6305 elem = xmlParseName(ctxt);
6306 if (elem == NULL) {
6307 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6308 if (ret != NULL)
6309 xmlFreeDocElementContent(ctxt->myDoc, ret);
6310 return(NULL);
6311 }
6312 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6313 if (last == NULL) {
6314 if (ret != NULL)
6315 xmlFreeDocElementContent(ctxt->myDoc, ret);
6316 return(NULL);
6317 }
6318 if (RAW == '?') {
6319 last->ocur = XML_ELEMENT_CONTENT_OPT;
6320 NEXT;
6321 } else if (RAW == '*') {
6322 last->ocur = XML_ELEMENT_CONTENT_MULT;
6323 NEXT;
6324 } else if (RAW == '+') {
6325 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6326 NEXT;
6327 } else {
6328 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6329 }
6330 }
6331 SKIP_BLANKS;
6332 GROW;
6333 }
6334 if ((cur != NULL) && (last != NULL)) {
6335 cur->c2 = last;
6336 if (last != NULL)
6337 last->parent = cur;
6338 }
6339 if (ctxt->input->id != inputchk) {
6340 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6341 "Element content declaration doesn't start and stop in"
6342 " the same entity\n");
6343 }
6344 NEXT;
6345 if (RAW == '?') {
6346 if (ret != NULL) {
6347 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6348 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6349 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6350 else
6351 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6352 }
6353 NEXT;
6354 } else if (RAW == '*') {
6355 if (ret != NULL) {
6356 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6357 cur = ret;
6358 /*
6359 * Some normalization:
6360 * (a | b* | c?)* == (a | b | c)*
6361 */
6362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6363 if ((cur->c1 != NULL) &&
6364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6367 if ((cur->c2 != NULL) &&
6368 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6369 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6370 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6371 cur = cur->c2;
6372 }
6373 }
6374 NEXT;
6375 } else if (RAW == '+') {
6376 if (ret != NULL) {
6377 int found = 0;
6378
6379 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6380 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6381 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6382 else
6383 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6384 /*
6385 * Some normalization:
6386 * (a | b*)+ == (a | b)*
6387 * (a | b?)+ == (a | b)*
6388 */
6389 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6390 if ((cur->c1 != NULL) &&
6391 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6392 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6393 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6394 found = 1;
6395 }
6396 if ((cur->c2 != NULL) &&
6397 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6398 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6399 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6400 found = 1;
6401 }
6402 cur = cur->c2;
6403 }
6404 if (found)
6405 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6406 }
6407 NEXT;
6408 }
6409 return(ret);
6410 }
6411
6412 /**
6413 * xmlParseElementChildrenContentDecl:
6414 * @ctxt: an XML parser context
6415 * @inputchk: the input used for the current entity, needed for boundary checks
6416 *
6417 * parse the declaration for a Mixed Element content
6418 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6419 *
6420 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6421 *
6422 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6423 *
6424 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6425 *
6426 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6427 *
6428 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6429 * TODO Parameter-entity replacement text must be properly nested
6430 * with parenthesized groups. That is to say, if either of the
6431 * opening or closing parentheses in a choice, seq, or Mixed
6432 * construct is contained in the replacement text for a parameter
6433 * entity, both must be contained in the same replacement text. For
6434 * interoperability, if a parameter-entity reference appears in a
6435 * choice, seq, or Mixed construct, its replacement text should not
6436 * be empty, and neither the first nor last non-blank character of
6437 * the replacement text should be a connector (| or ,).
6438 *
6439 * Returns the tree of xmlElementContentPtr describing the element
6440 * hierarchy.
6441 */
6442 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6443 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6444 /* stub left for API/ABI compat */
6445 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6446 }
6447
6448 /**
6449 * xmlParseElementContentDecl:
6450 * @ctxt: an XML parser context
6451 * @name: the name of the element being defined.
6452 * @result: the Element Content pointer will be stored here if any
6453 *
6454 * parse the declaration for an Element content either Mixed or Children,
6455 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6456 *
6457 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6458 *
6459 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6460 */
6461
6462 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6463 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6464 xmlElementContentPtr *result) {
6465
6466 xmlElementContentPtr tree = NULL;
6467 int inputid = ctxt->input->id;
6468 int res;
6469
6470 *result = NULL;
6471
6472 if (RAW != '(') {
6473 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6474 "xmlParseElementContentDecl : %s '(' expected\n", name);
6475 return(-1);
6476 }
6477 NEXT;
6478 GROW;
6479 if (ctxt->instate == XML_PARSER_EOF)
6480 return(-1);
6481 SKIP_BLANKS;
6482 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6483 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6484 res = XML_ELEMENT_TYPE_MIXED;
6485 } else {
6486 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6487 res = XML_ELEMENT_TYPE_ELEMENT;
6488 }
6489 SKIP_BLANKS;
6490 *result = tree;
6491 return(res);
6492 }
6493
6494 /**
6495 * xmlParseElementDecl:
6496 * @ctxt: an XML parser context
6497 *
6498 * parse an Element declaration.
6499 *
6500 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6501 *
6502 * [ VC: Unique Element Type Declaration ]
6503 * No element type may be declared more than once
6504 *
6505 * Returns the type of the element, or -1 in case of error
6506 */
6507 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6508 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6509 const xmlChar *name;
6510 int ret = -1;
6511 xmlElementContentPtr content = NULL;
6512
6513 /* GROW; done in the caller */
6514 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6515 int inputid = ctxt->input->id;
6516
6517 SKIP(9);
6518 if (SKIP_BLANKS == 0) {
6519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6520 "Space required after 'ELEMENT'\n");
6521 return(-1);
6522 }
6523 name = xmlParseName(ctxt);
6524 if (name == NULL) {
6525 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6526 "xmlParseElementDecl: no name for Element\n");
6527 return(-1);
6528 }
6529 if (SKIP_BLANKS == 0) {
6530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6531 "Space required after the element name\n");
6532 }
6533 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6534 SKIP(5);
6535 /*
6536 * Element must always be empty.
6537 */
6538 ret = XML_ELEMENT_TYPE_EMPTY;
6539 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6540 (NXT(2) == 'Y')) {
6541 SKIP(3);
6542 /*
6543 * Element is a generic container.
6544 */
6545 ret = XML_ELEMENT_TYPE_ANY;
6546 } else if (RAW == '(') {
6547 ret = xmlParseElementContentDecl(ctxt, name, &content);
6548 } else {
6549 /*
6550 * [ WFC: PEs in Internal Subset ] error handling.
6551 */
6552 if ((RAW == '%') && (ctxt->external == 0) &&
6553 (ctxt->inputNr == 1)) {
6554 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6555 "PEReference: forbidden within markup decl in internal subset\n");
6556 } else {
6557 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6558 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6559 }
6560 return(-1);
6561 }
6562
6563 SKIP_BLANKS;
6564
6565 if (RAW != '>') {
6566 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6567 if (content != NULL) {
6568 xmlFreeDocElementContent(ctxt->myDoc, content);
6569 }
6570 } else {
6571 if (inputid != ctxt->input->id) {
6572 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6573 "Element declaration doesn't start and stop in"
6574 " the same entity\n");
6575 }
6576
6577 NEXT;
6578 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6579 (ctxt->sax->elementDecl != NULL)) {
6580 if (content != NULL)
6581 content->parent = NULL;
6582 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6583 content);
6584 if ((content != NULL) && (content->parent == NULL)) {
6585 /*
6586 * this is a trick: if xmlAddElementDecl is called,
6587 * instead of copying the full tree it is plugged directly
6588 * if called from the parser. Avoid duplicating the
6589 * interfaces or change the API/ABI
6590 */
6591 xmlFreeDocElementContent(ctxt->myDoc, content);
6592 }
6593 } else if (content != NULL) {
6594 xmlFreeDocElementContent(ctxt->myDoc, content);
6595 }
6596 }
6597 }
6598 return(ret);
6599 }
6600
6601 /**
6602 * xmlParseConditionalSections
6603 * @ctxt: an XML parser context
6604 *
6605 * [61] conditionalSect ::= includeSect | ignoreSect
6606 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6607 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6608 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6609 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6610 */
6611
6612 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6613 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6614 int id = ctxt->input->id;
6615
6616 SKIP(3);
6617 SKIP_BLANKS;
6618 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6619 SKIP(7);
6620 SKIP_BLANKS;
6621 if (RAW != '[') {
6622 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6623 xmlHaltParser(ctxt);
6624 return;
6625 } else {
6626 if (ctxt->input->id != id) {
6627 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6628 "All markup of the conditional section is not"
6629 " in the same entity\n");
6630 }
6631 NEXT;
6632 }
6633 if (xmlParserDebugEntities) {
6634 if ((ctxt->input != NULL) && (ctxt->input->filename))
6635 xmlGenericError(xmlGenericErrorContext,
6636 "%s(%d): ", ctxt->input->filename,
6637 ctxt->input->line);
6638 xmlGenericError(xmlGenericErrorContext,
6639 "Entering INCLUDE Conditional Section\n");
6640 }
6641
6642 SKIP_BLANKS;
6643 GROW;
6644 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6645 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6646 const xmlChar *check = CUR_PTR;
6647 unsigned int cons = ctxt->input->consumed;
6648
6649 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6650 xmlParseConditionalSections(ctxt);
6651 } else
6652 xmlParseMarkupDecl(ctxt);
6653
6654 SKIP_BLANKS;
6655 GROW;
6656
6657 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6658 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6659 xmlHaltParser(ctxt);
6660 break;
6661 }
6662 }
6663 if (xmlParserDebugEntities) {
6664 if ((ctxt->input != NULL) && (ctxt->input->filename))
6665 xmlGenericError(xmlGenericErrorContext,
6666 "%s(%d): ", ctxt->input->filename,
6667 ctxt->input->line);
6668 xmlGenericError(xmlGenericErrorContext,
6669 "Leaving INCLUDE Conditional Section\n");
6670 }
6671
6672 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6673 int state;
6674 xmlParserInputState instate;
6675 int depth = 0;
6676
6677 SKIP(6);
6678 SKIP_BLANKS;
6679 if (RAW != '[') {
6680 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6681 xmlHaltParser(ctxt);
6682 return;
6683 } else {
6684 if (ctxt->input->id != id) {
6685 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6686 "All markup of the conditional section is not"
6687 " in the same entity\n");
6688 }
6689 NEXT;
6690 }
6691 if (xmlParserDebugEntities) {
6692 if ((ctxt->input != NULL) && (ctxt->input->filename))
6693 xmlGenericError(xmlGenericErrorContext,
6694 "%s(%d): ", ctxt->input->filename,
6695 ctxt->input->line);
6696 xmlGenericError(xmlGenericErrorContext,
6697 "Entering IGNORE Conditional Section\n");
6698 }
6699
6700 /*
6701 * Parse up to the end of the conditional section
6702 * But disable SAX event generating DTD building in the meantime
6703 */
6704 state = ctxt->disableSAX;
6705 instate = ctxt->instate;
6706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6707 ctxt->instate = XML_PARSER_IGNORE;
6708
6709 while (((depth >= 0) && (RAW != 0)) &&
6710 (ctxt->instate != XML_PARSER_EOF)) {
6711 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6712 depth++;
6713 SKIP(3);
6714 continue;
6715 }
6716 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6717 if (--depth >= 0) SKIP(3);
6718 continue;
6719 }
6720 NEXT;
6721 continue;
6722 }
6723
6724 ctxt->disableSAX = state;
6725 ctxt->instate = instate;
6726
6727 if (xmlParserDebugEntities) {
6728 if ((ctxt->input != NULL) && (ctxt->input->filename))
6729 xmlGenericError(xmlGenericErrorContext,
6730 "%s(%d): ", ctxt->input->filename,
6731 ctxt->input->line);
6732 xmlGenericError(xmlGenericErrorContext,
6733 "Leaving IGNORE Conditional Section\n");
6734 }
6735
6736 } else {
6737 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6738 xmlHaltParser(ctxt);
6739 return;
6740 }
6741
6742 if (RAW == 0)
6743 SHRINK;
6744
6745 if (RAW == 0) {
6746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6747 } else {
6748 if (ctxt->input->id != id) {
6749 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 "All markup of the conditional section is not in"
6751 " the same entity\n");
6752 }
6753 if ((ctxt-> instate != XML_PARSER_EOF) &&
6754 ((ctxt->input->cur + 3) <= ctxt->input->end))
6755 SKIP(3);
6756 }
6757 }
6758
6759 /**
6760 * xmlParseMarkupDecl:
6761 * @ctxt: an XML parser context
6762 *
6763 * parse Markup declarations
6764 *
6765 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6766 * NotationDecl | PI | Comment
6767 *
6768 * [ VC: Proper Declaration/PE Nesting ]
6769 * Parameter-entity replacement text must be properly nested with
6770 * markup declarations. That is to say, if either the first character
6771 * or the last character of a markup declaration (markupdecl above) is
6772 * contained in the replacement text for a parameter-entity reference,
6773 * both must be contained in the same replacement text.
6774 *
6775 * [ WFC: PEs in Internal Subset ]
6776 * In the internal DTD subset, parameter-entity references can occur
6777 * only where markup declarations can occur, not within markup declarations.
6778 * (This does not apply to references that occur in external parameter
6779 * entities or to the external subset.)
6780 */
6781 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6782 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6783 GROW;
6784 if (CUR == '<') {
6785 if (NXT(1) == '!') {
6786 switch (NXT(2)) {
6787 case 'E':
6788 if (NXT(3) == 'L')
6789 xmlParseElementDecl(ctxt);
6790 else if (NXT(3) == 'N')
6791 xmlParseEntityDecl(ctxt);
6792 break;
6793 case 'A':
6794 xmlParseAttributeListDecl(ctxt);
6795 break;
6796 case 'N':
6797 xmlParseNotationDecl(ctxt);
6798 break;
6799 case '-':
6800 xmlParseComment(ctxt);
6801 break;
6802 default:
6803 /* there is an error but it will be detected later */
6804 break;
6805 }
6806 } else if (NXT(1) == '?') {
6807 xmlParsePI(ctxt);
6808 }
6809 }
6810
6811 /*
6812 * detect requirement to exit there and act accordingly
6813 * and avoid having instate overriden later on
6814 */
6815 if (ctxt->instate == XML_PARSER_EOF)
6816 return;
6817
6818 /*
6819 * Conditional sections are allowed from entities included
6820 * by PE References in the internal subset.
6821 */
6822 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 xmlParseConditionalSections(ctxt);
6825 }
6826 }
6827
6828 ctxt->instate = XML_PARSER_DTD;
6829 }
6830
6831 /**
6832 * xmlParseTextDecl:
6833 * @ctxt: an XML parser context
6834 *
6835 * parse an XML declaration header for external entities
6836 *
6837 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6838 */
6839
6840 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6841 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6842 xmlChar *version;
6843 const xmlChar *encoding;
6844
6845 /*
6846 * We know that '<?xml' is here.
6847 */
6848 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6849 SKIP(5);
6850 } else {
6851 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6852 return;
6853 }
6854
6855 if (SKIP_BLANKS == 0) {
6856 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6857 "Space needed after '<?xml'\n");
6858 }
6859
6860 /*
6861 * We may have the VersionInfo here.
6862 */
6863 version = xmlParseVersionInfo(ctxt);
6864 if (version == NULL)
6865 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6866 else {
6867 if (SKIP_BLANKS == 0) {
6868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6869 "Space needed here\n");
6870 }
6871 }
6872 ctxt->input->version = version;
6873
6874 /*
6875 * We must have the encoding declaration
6876 */
6877 encoding = xmlParseEncodingDecl(ctxt);
6878 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6879 /*
6880 * The XML REC instructs us to stop parsing right here
6881 */
6882 return;
6883 }
6884 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6885 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6886 "Missing encoding in text declaration\n");
6887 }
6888
6889 SKIP_BLANKS;
6890 if ((RAW == '?') && (NXT(1) == '>')) {
6891 SKIP(2);
6892 } else if (RAW == '>') {
6893 /* Deprecated old WD ... */
6894 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6895 NEXT;
6896 } else {
6897 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6898 MOVETO_ENDTAG(CUR_PTR);
6899 NEXT;
6900 }
6901 }
6902
6903 /**
6904 * xmlParseExternalSubset:
6905 * @ctxt: an XML parser context
6906 * @ExternalID: the external identifier
6907 * @SystemID: the system identifier (or URL)
6908 *
6909 * parse Markup declarations from an external subset
6910 *
6911 * [30] extSubset ::= textDecl? extSubsetDecl
6912 *
6913 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6914 */
6915 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6916 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6917 const xmlChar *SystemID) {
6918 xmlDetectSAX2(ctxt);
6919 GROW;
6920
6921 if ((ctxt->encoding == NULL) &&
6922 (ctxt->input->end - ctxt->input->cur >= 4)) {
6923 xmlChar start[4];
6924 xmlCharEncoding enc;
6925
6926 start[0] = RAW;
6927 start[1] = NXT(1);
6928 start[2] = NXT(2);
6929 start[3] = NXT(3);
6930 enc = xmlDetectCharEncoding(start, 4);
6931 if (enc != XML_CHAR_ENCODING_NONE)
6932 xmlSwitchEncoding(ctxt, enc);
6933 }
6934
6935 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6936 xmlParseTextDecl(ctxt);
6937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6938 /*
6939 * The XML REC instructs us to stop parsing right here
6940 */
6941 xmlHaltParser(ctxt);
6942 return;
6943 }
6944 }
6945 if (ctxt->myDoc == NULL) {
6946 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6947 if (ctxt->myDoc == NULL) {
6948 xmlErrMemory(ctxt, "New Doc failed");
6949 return;
6950 }
6951 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6952 }
6953 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6954 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6955
6956 ctxt->instate = XML_PARSER_DTD;
6957 ctxt->external = 1;
6958 SKIP_BLANKS;
6959 while (((RAW == '<') && (NXT(1) == '?')) ||
6960 ((RAW == '<') && (NXT(1) == '!')) ||
6961 (RAW == '%')) {
6962 const xmlChar *check = CUR_PTR;
6963 unsigned int cons = ctxt->input->consumed;
6964
6965 GROW;
6966 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6967 xmlParseConditionalSections(ctxt);
6968 } else
6969 xmlParseMarkupDecl(ctxt);
6970 SKIP_BLANKS;
6971
6972 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6973 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6974 break;
6975 }
6976 }
6977
6978 if (RAW != 0) {
6979 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6980 }
6981
6982 }
6983
6984 /**
6985 * xmlParseReference:
6986 * @ctxt: an XML parser context
6987 *
6988 * parse and handle entity references in content, depending on the SAX
6989 * interface, this may end-up in a call to character() if this is a
6990 * CharRef, a predefined entity, if there is no reference() callback.
6991 * or if the parser was asked to switch to that mode.
6992 *
6993 * [67] Reference ::= EntityRef | CharRef
6994 */
6995 void
xmlParseReference(xmlParserCtxtPtr ctxt)6996 xmlParseReference(xmlParserCtxtPtr ctxt) {
6997 xmlEntityPtr ent;
6998 xmlChar *val;
6999 int was_checked;
7000 xmlNodePtr list = NULL;
7001 xmlParserErrors ret = XML_ERR_OK;
7002
7003
7004 if (RAW != '&')
7005 return;
7006
7007 /*
7008 * Simple case of a CharRef
7009 */
7010 if (NXT(1) == '#') {
7011 int i = 0;
7012 xmlChar out[10];
7013 int hex = NXT(2);
7014 int value = xmlParseCharRef(ctxt);
7015
7016 if (value == 0)
7017 return;
7018 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7019 /*
7020 * So we are using non-UTF-8 buffers
7021 * Check that the char fit on 8bits, if not
7022 * generate a CharRef.
7023 */
7024 if (value <= 0xFF) {
7025 out[0] = value;
7026 out[1] = 0;
7027 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7028 (!ctxt->disableSAX))
7029 ctxt->sax->characters(ctxt->userData, out, 1);
7030 } else {
7031 if ((hex == 'x') || (hex == 'X'))
7032 snprintf((char *)out, sizeof(out), "#x%X", value);
7033 else
7034 snprintf((char *)out, sizeof(out), "#%d", value);
7035 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7036 (!ctxt->disableSAX))
7037 ctxt->sax->reference(ctxt->userData, out);
7038 }
7039 } else {
7040 /*
7041 * Just encode the value in UTF-8
7042 */
7043 COPY_BUF(0 ,out, i, value);
7044 out[i] = 0;
7045 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7046 (!ctxt->disableSAX))
7047 ctxt->sax->characters(ctxt->userData, out, i);
7048 }
7049 return;
7050 }
7051
7052 /*
7053 * We are seeing an entity reference
7054 */
7055 ent = xmlParseEntityRef(ctxt);
7056 if (ent == NULL) return;
7057 if (!ctxt->wellFormed)
7058 return;
7059 was_checked = ent->checked;
7060
7061 /* special case of predefined entities */
7062 if ((ent->name == NULL) ||
7063 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7064 val = ent->content;
7065 if (val == NULL) return;
7066 /*
7067 * inline the entity.
7068 */
7069 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7070 (!ctxt->disableSAX))
7071 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7072 return;
7073 }
7074
7075 /*
7076 * The first reference to the entity trigger a parsing phase
7077 * where the ent->children is filled with the result from
7078 * the parsing.
7079 * Note: external parsed entities will not be loaded, it is not
7080 * required for a non-validating parser, unless the parsing option
7081 * of validating, or substituting entities were given. Doing so is
7082 * far more secure as the parser will only process data coming from
7083 * the document entity by default.
7084 */
7085 if (((ent->checked == 0) ||
7086 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7087 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7088 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7089 unsigned long oldnbent = ctxt->nbentities;
7090
7091 /*
7092 * This is a bit hackish but this seems the best
7093 * way to make sure both SAX and DOM entity support
7094 * behaves okay.
7095 */
7096 void *user_data;
7097 if (ctxt->userData == ctxt)
7098 user_data = NULL;
7099 else
7100 user_data = ctxt->userData;
7101
7102 /*
7103 * Check that this entity is well formed
7104 * 4.3.2: An internal general parsed entity is well-formed
7105 * if its replacement text matches the production labeled
7106 * content.
7107 */
7108 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7109 ctxt->depth++;
7110 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7111 user_data, &list);
7112 ctxt->depth--;
7113
7114 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7115 ctxt->depth++;
7116 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7117 user_data, ctxt->depth, ent->URI,
7118 ent->ExternalID, &list);
7119 ctxt->depth--;
7120 } else {
7121 ret = XML_ERR_ENTITY_PE_INTERNAL;
7122 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7123 "invalid entity type found\n", NULL);
7124 }
7125
7126 /*
7127 * Store the number of entities needing parsing for this entity
7128 * content and do checkings
7129 */
7130 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7131 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7132 ent->checked |= 1;
7133 if (ret == XML_ERR_ENTITY_LOOP) {
7134 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7135 xmlFreeNodeList(list);
7136 return;
7137 }
7138 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7139 xmlFreeNodeList(list);
7140 return;
7141 }
7142
7143 if ((ret == XML_ERR_OK) && (list != NULL)) {
7144 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7145 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7146 (ent->children == NULL)) {
7147 ent->children = list;
7148 if (ctxt->replaceEntities) {
7149 /*
7150 * Prune it directly in the generated document
7151 * except for single text nodes.
7152 */
7153 if (((list->type == XML_TEXT_NODE) &&
7154 (list->next == NULL)) ||
7155 (ctxt->parseMode == XML_PARSE_READER)) {
7156 list->parent = (xmlNodePtr) ent;
7157 list = NULL;
7158 ent->owner = 1;
7159 } else {
7160 ent->owner = 0;
7161 while (list != NULL) {
7162 list->parent = (xmlNodePtr) ctxt->node;
7163 list->doc = ctxt->myDoc;
7164 if (list->next == NULL)
7165 ent->last = list;
7166 list = list->next;
7167 }
7168 list = ent->children;
7169 #ifdef LIBXML_LEGACY_ENABLED
7170 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7171 xmlAddEntityReference(ent, list, NULL);
7172 #endif /* LIBXML_LEGACY_ENABLED */
7173 }
7174 } else {
7175 ent->owner = 1;
7176 while (list != NULL) {
7177 list->parent = (xmlNodePtr) ent;
7178 xmlSetTreeDoc(list, ent->doc);
7179 if (list->next == NULL)
7180 ent->last = list;
7181 list = list->next;
7182 }
7183 }
7184 } else {
7185 xmlFreeNodeList(list);
7186 list = NULL;
7187 }
7188 } else if ((ret != XML_ERR_OK) &&
7189 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7190 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7191 "Entity '%s' failed to parse\n", ent->name);
7192 if (ent->content != NULL)
7193 ent->content[0] = 0;
7194 xmlParserEntityCheck(ctxt, 0, ent, 0);
7195 } else if (list != NULL) {
7196 xmlFreeNodeList(list);
7197 list = NULL;
7198 }
7199 if (ent->checked == 0)
7200 ent->checked = 2;
7201
7202 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7203 was_checked = 0;
7204 } else if (ent->checked != 1) {
7205 ctxt->nbentities += ent->checked / 2;
7206 }
7207
7208 /*
7209 * Now that the entity content has been gathered
7210 * provide it to the application, this can take different forms based
7211 * on the parsing modes.
7212 */
7213 if (ent->children == NULL) {
7214 /*
7215 * Probably running in SAX mode and the callbacks don't
7216 * build the entity content. So unless we already went
7217 * though parsing for first checking go though the entity
7218 * content to generate callbacks associated to the entity
7219 */
7220 if (was_checked != 0) {
7221 void *user_data;
7222 /*
7223 * This is a bit hackish but this seems the best
7224 * way to make sure both SAX and DOM entity support
7225 * behaves okay.
7226 */
7227 if (ctxt->userData == ctxt)
7228 user_data = NULL;
7229 else
7230 user_data = ctxt->userData;
7231
7232 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7233 ctxt->depth++;
7234 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7235 ent->content, user_data, NULL);
7236 ctxt->depth--;
7237 } else if (ent->etype ==
7238 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7239 ctxt->depth++;
7240 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7241 ctxt->sax, user_data, ctxt->depth,
7242 ent->URI, ent->ExternalID, NULL);
7243 ctxt->depth--;
7244 } else {
7245 ret = XML_ERR_ENTITY_PE_INTERNAL;
7246 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7247 "invalid entity type found\n", NULL);
7248 }
7249 if (ret == XML_ERR_ENTITY_LOOP) {
7250 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7251 return;
7252 }
7253 }
7254 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7255 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7256 /*
7257 * Entity reference callback comes second, it's somewhat
7258 * superfluous but a compatibility to historical behaviour
7259 */
7260 ctxt->sax->reference(ctxt->userData, ent->name);
7261 }
7262 return;
7263 }
7264
7265 /*
7266 * If we didn't get any children for the entity being built
7267 */
7268 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7269 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7270 /*
7271 * Create a node.
7272 */
7273 ctxt->sax->reference(ctxt->userData, ent->name);
7274 return;
7275 }
7276
7277 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7278 /*
7279 * There is a problem on the handling of _private for entities
7280 * (bug 155816): Should we copy the content of the field from
7281 * the entity (possibly overwriting some value set by the user
7282 * when a copy is created), should we leave it alone, or should
7283 * we try to take care of different situations? The problem
7284 * is exacerbated by the usage of this field by the xmlReader.
7285 * To fix this bug, we look at _private on the created node
7286 * and, if it's NULL, we copy in whatever was in the entity.
7287 * If it's not NULL we leave it alone. This is somewhat of a
7288 * hack - maybe we should have further tests to determine
7289 * what to do.
7290 */
7291 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7292 /*
7293 * Seems we are generating the DOM content, do
7294 * a simple tree copy for all references except the first
7295 * In the first occurrence list contains the replacement.
7296 */
7297 if (((list == NULL) && (ent->owner == 0)) ||
7298 (ctxt->parseMode == XML_PARSE_READER)) {
7299 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7300
7301 /*
7302 * We are copying here, make sure there is no abuse
7303 */
7304 ctxt->sizeentcopy += ent->length + 5;
7305 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7306 return;
7307
7308 /*
7309 * when operating on a reader, the entities definitions
7310 * are always owning the entities subtree.
7311 if (ctxt->parseMode == XML_PARSE_READER)
7312 ent->owner = 1;
7313 */
7314
7315 cur = ent->children;
7316 while (cur != NULL) {
7317 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7318 if (nw != NULL) {
7319 if (nw->_private == NULL)
7320 nw->_private = cur->_private;
7321 if (firstChild == NULL){
7322 firstChild = nw;
7323 }
7324 nw = xmlAddChild(ctxt->node, nw);
7325 }
7326 if (cur == ent->last) {
7327 /*
7328 * needed to detect some strange empty
7329 * node cases in the reader tests
7330 */
7331 if ((ctxt->parseMode == XML_PARSE_READER) &&
7332 (nw != NULL) &&
7333 (nw->type == XML_ELEMENT_NODE) &&
7334 (nw->children == NULL))
7335 nw->extra = 1;
7336
7337 break;
7338 }
7339 cur = cur->next;
7340 }
7341 #ifdef LIBXML_LEGACY_ENABLED
7342 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7343 xmlAddEntityReference(ent, firstChild, nw);
7344 #endif /* LIBXML_LEGACY_ENABLED */
7345 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7346 xmlNodePtr nw = NULL, cur, next, last,
7347 firstChild = NULL;
7348
7349 /*
7350 * We are copying here, make sure there is no abuse
7351 */
7352 ctxt->sizeentcopy += ent->length + 5;
7353 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7354 return;
7355
7356 /*
7357 * Copy the entity child list and make it the new
7358 * entity child list. The goal is to make sure any
7359 * ID or REF referenced will be the one from the
7360 * document content and not the entity copy.
7361 */
7362 cur = ent->children;
7363 ent->children = NULL;
7364 last = ent->last;
7365 ent->last = NULL;
7366 while (cur != NULL) {
7367 next = cur->next;
7368 cur->next = NULL;
7369 cur->parent = NULL;
7370 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7371 if (nw != NULL) {
7372 if (nw->_private == NULL)
7373 nw->_private = cur->_private;
7374 if (firstChild == NULL){
7375 firstChild = cur;
7376 }
7377 xmlAddChild((xmlNodePtr) ent, nw);
7378 xmlAddChild(ctxt->node, cur);
7379 }
7380 if (cur == last)
7381 break;
7382 cur = next;
7383 }
7384 if (ent->owner == 0)
7385 ent->owner = 1;
7386 #ifdef LIBXML_LEGACY_ENABLED
7387 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7388 xmlAddEntityReference(ent, firstChild, nw);
7389 #endif /* LIBXML_LEGACY_ENABLED */
7390 } else {
7391 const xmlChar *nbktext;
7392
7393 /*
7394 * the name change is to avoid coalescing of the
7395 * node with a possible previous text one which
7396 * would make ent->children a dangling pointer
7397 */
7398 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7399 -1);
7400 if (ent->children->type == XML_TEXT_NODE)
7401 ent->children->name = nbktext;
7402 if ((ent->last != ent->children) &&
7403 (ent->last->type == XML_TEXT_NODE))
7404 ent->last->name = nbktext;
7405 xmlAddChildList(ctxt->node, ent->children);
7406 }
7407
7408 /*
7409 * This is to avoid a nasty side effect, see
7410 * characters() in SAX.c
7411 */
7412 ctxt->nodemem = 0;
7413 ctxt->nodelen = 0;
7414 return;
7415 }
7416 }
7417 }
7418
7419 /**
7420 * xmlParseEntityRef:
7421 * @ctxt: an XML parser context
7422 *
7423 * parse ENTITY references declarations
7424 *
7425 * [68] EntityRef ::= '&' Name ';'
7426 *
7427 * [ WFC: Entity Declared ]
7428 * In a document without any DTD, a document with only an internal DTD
7429 * subset which contains no parameter entity references, or a document
7430 * with "standalone='yes'", the Name given in the entity reference
7431 * must match that in an entity declaration, except that well-formed
7432 * documents need not declare any of the following entities: amp, lt,
7433 * gt, apos, quot. The declaration of a parameter entity must precede
7434 * any reference to it. Similarly, the declaration of a general entity
7435 * must precede any reference to it which appears in a default value in an
7436 * attribute-list declaration. Note that if entities are declared in the
7437 * external subset or in external parameter entities, a non-validating
7438 * processor is not obligated to read and process their declarations;
7439 * for such documents, the rule that an entity must be declared is a
7440 * well-formedness constraint only if standalone='yes'.
7441 *
7442 * [ WFC: Parsed Entity ]
7443 * An entity reference must not contain the name of an unparsed entity
7444 *
7445 * Returns the xmlEntityPtr if found, or NULL otherwise.
7446 */
7447 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7448 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7449 const xmlChar *name;
7450 xmlEntityPtr ent = NULL;
7451
7452 GROW;
7453 if (ctxt->instate == XML_PARSER_EOF)
7454 return(NULL);
7455
7456 if (RAW != '&')
7457 return(NULL);
7458 NEXT;
7459 name = xmlParseName(ctxt);
7460 if (name == NULL) {
7461 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7462 "xmlParseEntityRef: no name\n");
7463 return(NULL);
7464 }
7465 if (RAW != ';') {
7466 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7467 return(NULL);
7468 }
7469 NEXT;
7470
7471 /*
7472 * Predefined entities override any extra definition
7473 */
7474 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7475 ent = xmlGetPredefinedEntity(name);
7476 if (ent != NULL)
7477 return(ent);
7478 }
7479
7480 /*
7481 * Increase the number of entity references parsed
7482 */
7483 ctxt->nbentities++;
7484
7485 /*
7486 * Ask first SAX for entity resolution, otherwise try the
7487 * entities which may have stored in the parser context.
7488 */
7489 if (ctxt->sax != NULL) {
7490 if (ctxt->sax->getEntity != NULL)
7491 ent = ctxt->sax->getEntity(ctxt->userData, name);
7492 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7493 (ctxt->options & XML_PARSE_OLDSAX))
7494 ent = xmlGetPredefinedEntity(name);
7495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496 (ctxt->userData==ctxt)) {
7497 ent = xmlSAX2GetEntity(ctxt, name);
7498 }
7499 }
7500 if (ctxt->instate == XML_PARSER_EOF)
7501 return(NULL);
7502 /*
7503 * [ WFC: Entity Declared ]
7504 * In a document without any DTD, a document with only an
7505 * internal DTD subset which contains no parameter entity
7506 * references, or a document with "standalone='yes'", the
7507 * Name given in the entity reference must match that in an
7508 * entity declaration, except that well-formed documents
7509 * need not declare any of the following entities: amp, lt,
7510 * gt, apos, quot.
7511 * The declaration of a parameter entity must precede any
7512 * reference to it.
7513 * Similarly, the declaration of a general entity must
7514 * precede any reference to it which appears in a default
7515 * value in an attribute-list declaration. Note that if
7516 * entities are declared in the external subset or in
7517 * external parameter entities, a non-validating processor
7518 * is not obligated to read and process their declarations;
7519 * for such documents, the rule that an entity must be
7520 * declared is a well-formedness constraint only if
7521 * standalone='yes'.
7522 */
7523 if (ent == NULL) {
7524 if ((ctxt->standalone == 1) ||
7525 ((ctxt->hasExternalSubset == 0) &&
7526 (ctxt->hasPErefs == 0))) {
7527 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7528 "Entity '%s' not defined\n", name);
7529 } else {
7530 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7531 "Entity '%s' not defined\n", name);
7532 if ((ctxt->inSubset == 0) &&
7533 (ctxt->sax != NULL) &&
7534 (ctxt->sax->reference != NULL)) {
7535 ctxt->sax->reference(ctxt->userData, name);
7536 }
7537 }
7538 xmlParserEntityCheck(ctxt, 0, ent, 0);
7539 ctxt->valid = 0;
7540 }
7541
7542 /*
7543 * [ WFC: Parsed Entity ]
7544 * An entity reference must not contain the name of an
7545 * unparsed entity
7546 */
7547 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7548 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7549 "Entity reference to unparsed entity %s\n", name);
7550 }
7551
7552 /*
7553 * [ WFC: No External Entity References ]
7554 * Attribute values cannot contain direct or indirect
7555 * entity references to external entities.
7556 */
7557 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7558 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7559 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7560 "Attribute references external entity '%s'\n", name);
7561 }
7562 /*
7563 * [ WFC: No < in Attribute Values ]
7564 * The replacement text of any entity referred to directly or
7565 * indirectly in an attribute value (other than "<") must
7566 * not contain a <.
7567 */
7568 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7569 (ent != NULL) &&
7570 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7571 if (((ent->checked & 1) || (ent->checked == 0)) &&
7572 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7573 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7574 "'<' in entity '%s' is not allowed in attributes values\n", name);
7575 }
7576 }
7577
7578 /*
7579 * Internal check, no parameter entities here ...
7580 */
7581 else {
7582 switch (ent->etype) {
7583 case XML_INTERNAL_PARAMETER_ENTITY:
7584 case XML_EXTERNAL_PARAMETER_ENTITY:
7585 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7586 "Attempt to reference the parameter entity '%s'\n",
7587 name);
7588 break;
7589 default:
7590 break;
7591 }
7592 }
7593
7594 /*
7595 * [ WFC: No Recursion ]
7596 * A parsed entity must not contain a recursive reference
7597 * to itself, either directly or indirectly.
7598 * Done somewhere else
7599 */
7600 return(ent);
7601 }
7602
7603 /**
7604 * xmlParseStringEntityRef:
7605 * @ctxt: an XML parser context
7606 * @str: a pointer to an index in the string
7607 *
7608 * parse ENTITY references declarations, but this version parses it from
7609 * a string value.
7610 *
7611 * [68] EntityRef ::= '&' Name ';'
7612 *
7613 * [ WFC: Entity Declared ]
7614 * In a document without any DTD, a document with only an internal DTD
7615 * subset which contains no parameter entity references, or a document
7616 * with "standalone='yes'", the Name given in the entity reference
7617 * must match that in an entity declaration, except that well-formed
7618 * documents need not declare any of the following entities: amp, lt,
7619 * gt, apos, quot. The declaration of a parameter entity must precede
7620 * any reference to it. Similarly, the declaration of a general entity
7621 * must precede any reference to it which appears in a default value in an
7622 * attribute-list declaration. Note that if entities are declared in the
7623 * external subset or in external parameter entities, a non-validating
7624 * processor is not obligated to read and process their declarations;
7625 * for such documents, the rule that an entity must be declared is a
7626 * well-formedness constraint only if standalone='yes'.
7627 *
7628 * [ WFC: Parsed Entity ]
7629 * An entity reference must not contain the name of an unparsed entity
7630 *
7631 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7632 * is updated to the current location in the string.
7633 */
7634 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7635 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7636 xmlChar *name;
7637 const xmlChar *ptr;
7638 xmlChar cur;
7639 xmlEntityPtr ent = NULL;
7640
7641 if ((str == NULL) || (*str == NULL))
7642 return(NULL);
7643 ptr = *str;
7644 cur = *ptr;
7645 if (cur != '&')
7646 return(NULL);
7647
7648 ptr++;
7649 name = xmlParseStringName(ctxt, &ptr);
7650 if (name == NULL) {
7651 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7652 "xmlParseStringEntityRef: no name\n");
7653 *str = ptr;
7654 return(NULL);
7655 }
7656 if (*ptr != ';') {
7657 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7658 xmlFree(name);
7659 *str = ptr;
7660 return(NULL);
7661 }
7662 ptr++;
7663
7664
7665 /*
7666 * Predefined entities override any extra definition
7667 */
7668 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7669 ent = xmlGetPredefinedEntity(name);
7670 if (ent != NULL) {
7671 xmlFree(name);
7672 *str = ptr;
7673 return(ent);
7674 }
7675 }
7676
7677 /*
7678 * Increate the number of entity references parsed
7679 */
7680 ctxt->nbentities++;
7681
7682 /*
7683 * Ask first SAX for entity resolution, otherwise try the
7684 * entities which may have stored in the parser context.
7685 */
7686 if (ctxt->sax != NULL) {
7687 if (ctxt->sax->getEntity != NULL)
7688 ent = ctxt->sax->getEntity(ctxt->userData, name);
7689 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7690 ent = xmlGetPredefinedEntity(name);
7691 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7692 ent = xmlSAX2GetEntity(ctxt, name);
7693 }
7694 }
7695 if (ctxt->instate == XML_PARSER_EOF) {
7696 xmlFree(name);
7697 return(NULL);
7698 }
7699
7700 /*
7701 * [ WFC: Entity Declared ]
7702 * In a document without any DTD, a document with only an
7703 * internal DTD subset which contains no parameter entity
7704 * references, or a document with "standalone='yes'", the
7705 * Name given in the entity reference must match that in an
7706 * entity declaration, except that well-formed documents
7707 * need not declare any of the following entities: amp, lt,
7708 * gt, apos, quot.
7709 * The declaration of a parameter entity must precede any
7710 * reference to it.
7711 * Similarly, the declaration of a general entity must
7712 * precede any reference to it which appears in a default
7713 * value in an attribute-list declaration. Note that if
7714 * entities are declared in the external subset or in
7715 * external parameter entities, a non-validating processor
7716 * is not obligated to read and process their declarations;
7717 * for such documents, the rule that an entity must be
7718 * declared is a well-formedness constraint only if
7719 * standalone='yes'.
7720 */
7721 if (ent == NULL) {
7722 if ((ctxt->standalone == 1) ||
7723 ((ctxt->hasExternalSubset == 0) &&
7724 (ctxt->hasPErefs == 0))) {
7725 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7726 "Entity '%s' not defined\n", name);
7727 } else {
7728 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7729 "Entity '%s' not defined\n",
7730 name);
7731 }
7732 xmlParserEntityCheck(ctxt, 0, ent, 0);
7733 /* TODO ? check regressions ctxt->valid = 0; */
7734 }
7735
7736 /*
7737 * [ WFC: Parsed Entity ]
7738 * An entity reference must not contain the name of an
7739 * unparsed entity
7740 */
7741 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7742 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7743 "Entity reference to unparsed entity %s\n", name);
7744 }
7745
7746 /*
7747 * [ WFC: No External Entity References ]
7748 * Attribute values cannot contain direct or indirect
7749 * entity references to external entities.
7750 */
7751 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7752 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7753 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7754 "Attribute references external entity '%s'\n", name);
7755 }
7756 /*
7757 * [ WFC: No < in Attribute Values ]
7758 * The replacement text of any entity referred to directly or
7759 * indirectly in an attribute value (other than "<") must
7760 * not contain a <.
7761 */
7762 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7763 (ent != NULL) && (ent->content != NULL) &&
7764 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7765 (xmlStrchr(ent->content, '<'))) {
7766 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7767 "'<' in entity '%s' is not allowed in attributes values\n",
7768 name);
7769 }
7770
7771 /*
7772 * Internal check, no parameter entities here ...
7773 */
7774 else {
7775 switch (ent->etype) {
7776 case XML_INTERNAL_PARAMETER_ENTITY:
7777 case XML_EXTERNAL_PARAMETER_ENTITY:
7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7779 "Attempt to reference the parameter entity '%s'\n",
7780 name);
7781 break;
7782 default:
7783 break;
7784 }
7785 }
7786
7787 /*
7788 * [ WFC: No Recursion ]
7789 * A parsed entity must not contain a recursive reference
7790 * to itself, either directly or indirectly.
7791 * Done somewhere else
7792 */
7793
7794 xmlFree(name);
7795 *str = ptr;
7796 return(ent);
7797 }
7798
7799 /**
7800 * xmlParsePEReference:
7801 * @ctxt: an XML parser context
7802 *
7803 * parse PEReference declarations
7804 * The entity content is handled directly by pushing it's content as
7805 * a new input stream.
7806 *
7807 * [69] PEReference ::= '%' Name ';'
7808 *
7809 * [ WFC: No Recursion ]
7810 * A parsed entity must not contain a recursive
7811 * reference to itself, either directly or indirectly.
7812 *
7813 * [ WFC: Entity Declared ]
7814 * In a document without any DTD, a document with only an internal DTD
7815 * subset which contains no parameter entity references, or a document
7816 * with "standalone='yes'", ... ... The declaration of a parameter
7817 * entity must precede any reference to it...
7818 *
7819 * [ VC: Entity Declared ]
7820 * In a document with an external subset or external parameter entities
7821 * with "standalone='no'", ... ... The declaration of a parameter entity
7822 * must precede any reference to it...
7823 *
7824 * [ WFC: In DTD ]
7825 * Parameter-entity references may only appear in the DTD.
7826 * NOTE: misleading but this is handled.
7827 */
7828 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7829 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7830 {
7831 const xmlChar *name;
7832 xmlEntityPtr entity = NULL;
7833 xmlParserInputPtr input;
7834
7835 if (RAW != '%')
7836 return;
7837 NEXT;
7838 name = xmlParseName(ctxt);
7839 if (name == NULL) {
7840 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7841 return;
7842 }
7843 if (xmlParserDebugEntities)
7844 xmlGenericError(xmlGenericErrorContext,
7845 "PEReference: %s\n", name);
7846 if (RAW != ';') {
7847 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7848 return;
7849 }
7850
7851 NEXT;
7852
7853 /*
7854 * Increate the number of entity references parsed
7855 */
7856 ctxt->nbentities++;
7857
7858 /*
7859 * Request the entity from SAX
7860 */
7861 if ((ctxt->sax != NULL) &&
7862 (ctxt->sax->getParameterEntity != NULL))
7863 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7864 if (ctxt->instate == XML_PARSER_EOF)
7865 return;
7866 if (entity == NULL) {
7867 /*
7868 * [ WFC: Entity Declared ]
7869 * In a document without any DTD, a document with only an
7870 * internal DTD subset which contains no parameter entity
7871 * references, or a document with "standalone='yes'", ...
7872 * ... The declaration of a parameter entity must precede
7873 * any reference to it...
7874 */
7875 if ((ctxt->standalone == 1) ||
7876 ((ctxt->hasExternalSubset == 0) &&
7877 (ctxt->hasPErefs == 0))) {
7878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7879 "PEReference: %%%s; not found\n",
7880 name);
7881 } else {
7882 /*
7883 * [ VC: Entity Declared ]
7884 * In a document with an external subset or external
7885 * parameter entities with "standalone='no'", ...
7886 * ... The declaration of a parameter entity must
7887 * precede any reference to it...
7888 */
7889 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7890 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7891 "PEReference: %%%s; not found\n",
7892 name, NULL);
7893 } else
7894 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895 "PEReference: %%%s; not found\n",
7896 name, NULL);
7897 ctxt->valid = 0;
7898 }
7899 xmlParserEntityCheck(ctxt, 0, NULL, 0);
7900 } else {
7901 /*
7902 * Internal checking in case the entity quest barfed
7903 */
7904 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7905 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 "Internal: %%%s; is not a parameter entity\n",
7908 name, NULL);
7909 } else {
7910 xmlChar start[4];
7911 xmlCharEncoding enc;
7912
7913 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7914 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7915 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7916 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7917 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7918 (ctxt->replaceEntities == 0) &&
7919 (ctxt->validate == 0))
7920 return;
7921
7922 input = xmlNewEntityInputStream(ctxt, entity);
7923 if (xmlPushInput(ctxt, input) < 0) {
7924 xmlFreeInputStream(input);
7925 return;
7926 }
7927
7928 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7929 /*
7930 * Get the 4 first bytes and decode the charset
7931 * if enc != XML_CHAR_ENCODING_NONE
7932 * plug some encoding conversion routines.
7933 * Note that, since we may have some non-UTF8
7934 * encoding (like UTF16, bug 135229), the 'length'
7935 * is not known, but we can calculate based upon
7936 * the amount of data in the buffer.
7937 */
7938 GROW
7939 if (ctxt->instate == XML_PARSER_EOF)
7940 return;
7941 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7942 start[0] = RAW;
7943 start[1] = NXT(1);
7944 start[2] = NXT(2);
7945 start[3] = NXT(3);
7946 enc = xmlDetectCharEncoding(start, 4);
7947 if (enc != XML_CHAR_ENCODING_NONE) {
7948 xmlSwitchEncoding(ctxt, enc);
7949 }
7950 }
7951
7952 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7953 (IS_BLANK_CH(NXT(5)))) {
7954 xmlParseTextDecl(ctxt);
7955 }
7956 }
7957 }
7958 }
7959 ctxt->hasPErefs = 1;
7960 }
7961
7962 /**
7963 * xmlLoadEntityContent:
7964 * @ctxt: an XML parser context
7965 * @entity: an unloaded system entity
7966 *
7967 * Load the original content of the given system entity from the
7968 * ExternalID/SystemID given. This is to be used for Included in Literal
7969 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7970 *
7971 * Returns 0 in case of success and -1 in case of failure
7972 */
7973 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7974 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7975 xmlParserInputPtr input;
7976 xmlBufferPtr buf;
7977 int l, c;
7978 int count = 0;
7979
7980 if ((ctxt == NULL) || (entity == NULL) ||
7981 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7982 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7983 (entity->content != NULL)) {
7984 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7985 "xmlLoadEntityContent parameter error");
7986 return(-1);
7987 }
7988
7989 if (xmlParserDebugEntities)
7990 xmlGenericError(xmlGenericErrorContext,
7991 "Reading %s entity content input\n", entity->name);
7992
7993 buf = xmlBufferCreate();
7994 if (buf == NULL) {
7995 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7996 "xmlLoadEntityContent parameter error");
7997 return(-1);
7998 }
7999
8000 input = xmlNewEntityInputStream(ctxt, entity);
8001 if (input == NULL) {
8002 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8003 "xmlLoadEntityContent input error");
8004 xmlBufferFree(buf);
8005 return(-1);
8006 }
8007
8008 /*
8009 * Push the entity as the current input, read char by char
8010 * saving to the buffer until the end of the entity or an error
8011 */
8012 if (xmlPushInput(ctxt, input) < 0) {
8013 xmlBufferFree(buf);
8014 return(-1);
8015 }
8016
8017 GROW;
8018 c = CUR_CHAR(l);
8019 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8020 (IS_CHAR(c))) {
8021 xmlBufferAdd(buf, ctxt->input->cur, l);
8022 if (count++ > XML_PARSER_CHUNK_SIZE) {
8023 count = 0;
8024 GROW;
8025 if (ctxt->instate == XML_PARSER_EOF) {
8026 xmlBufferFree(buf);
8027 return(-1);
8028 }
8029 }
8030 NEXTL(l);
8031 c = CUR_CHAR(l);
8032 if (c == 0) {
8033 count = 0;
8034 GROW;
8035 if (ctxt->instate == XML_PARSER_EOF) {
8036 xmlBufferFree(buf);
8037 return(-1);
8038 }
8039 c = CUR_CHAR(l);
8040 }
8041 }
8042
8043 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8044 xmlPopInput(ctxt);
8045 } else if (!IS_CHAR(c)) {
8046 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8047 "xmlLoadEntityContent: invalid char value %d\n",
8048 c);
8049 xmlBufferFree(buf);
8050 return(-1);
8051 }
8052 entity->content = buf->content;
8053 buf->content = NULL;
8054 xmlBufferFree(buf);
8055
8056 return(0);
8057 }
8058
8059 /**
8060 * xmlParseStringPEReference:
8061 * @ctxt: an XML parser context
8062 * @str: a pointer to an index in the string
8063 *
8064 * parse PEReference declarations
8065 *
8066 * [69] PEReference ::= '%' Name ';'
8067 *
8068 * [ WFC: No Recursion ]
8069 * A parsed entity must not contain a recursive
8070 * reference to itself, either directly or indirectly.
8071 *
8072 * [ WFC: Entity Declared ]
8073 * In a document without any DTD, a document with only an internal DTD
8074 * subset which contains no parameter entity references, or a document
8075 * with "standalone='yes'", ... ... The declaration of a parameter
8076 * entity must precede any reference to it...
8077 *
8078 * [ VC: Entity Declared ]
8079 * In a document with an external subset or external parameter entities
8080 * with "standalone='no'", ... ... The declaration of a parameter entity
8081 * must precede any reference to it...
8082 *
8083 * [ WFC: In DTD ]
8084 * Parameter-entity references may only appear in the DTD.
8085 * NOTE: misleading but this is handled.
8086 *
8087 * Returns the string of the entity content.
8088 * str is updated to the current value of the index
8089 */
8090 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8091 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8092 const xmlChar *ptr;
8093 xmlChar cur;
8094 xmlChar *name;
8095 xmlEntityPtr entity = NULL;
8096
8097 if ((str == NULL) || (*str == NULL)) return(NULL);
8098 ptr = *str;
8099 cur = *ptr;
8100 if (cur != '%')
8101 return(NULL);
8102 ptr++;
8103 name = xmlParseStringName(ctxt, &ptr);
8104 if (name == NULL) {
8105 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8106 "xmlParseStringPEReference: no name\n");
8107 *str = ptr;
8108 return(NULL);
8109 }
8110 cur = *ptr;
8111 if (cur != ';') {
8112 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8113 xmlFree(name);
8114 *str = ptr;
8115 return(NULL);
8116 }
8117 ptr++;
8118
8119 /*
8120 * Increate the number of entity references parsed
8121 */
8122 ctxt->nbentities++;
8123
8124 /*
8125 * Request the entity from SAX
8126 */
8127 if ((ctxt->sax != NULL) &&
8128 (ctxt->sax->getParameterEntity != NULL))
8129 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130 if (ctxt->instate == XML_PARSER_EOF) {
8131 xmlFree(name);
8132 *str = ptr;
8133 return(NULL);
8134 }
8135 if (entity == NULL) {
8136 /*
8137 * [ WFC: Entity Declared ]
8138 * In a document without any DTD, a document with only an
8139 * internal DTD subset which contains no parameter entity
8140 * references, or a document with "standalone='yes'", ...
8141 * ... The declaration of a parameter entity must precede
8142 * any reference to it...
8143 */
8144 if ((ctxt->standalone == 1) ||
8145 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8146 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8147 "PEReference: %%%s; not found\n", name);
8148 } else {
8149 /*
8150 * [ VC: Entity Declared ]
8151 * In a document with an external subset or external
8152 * parameter entities with "standalone='no'", ...
8153 * ... The declaration of a parameter entity must
8154 * precede any reference to it...
8155 */
8156 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8157 "PEReference: %%%s; not found\n",
8158 name, NULL);
8159 ctxt->valid = 0;
8160 }
8161 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8162 } else {
8163 /*
8164 * Internal checking in case the entity quest barfed
8165 */
8166 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8167 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8168 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8169 "%%%s; is not a parameter entity\n",
8170 name, NULL);
8171 }
8172 }
8173 ctxt->hasPErefs = 1;
8174 xmlFree(name);
8175 *str = ptr;
8176 return(entity);
8177 }
8178
8179 /**
8180 * xmlParseDocTypeDecl:
8181 * @ctxt: an XML parser context
8182 *
8183 * parse a DOCTYPE declaration
8184 *
8185 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8186 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8187 *
8188 * [ VC: Root Element Type ]
8189 * The Name in the document type declaration must match the element
8190 * type of the root element.
8191 */
8192
8193 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8194 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8195 const xmlChar *name = NULL;
8196 xmlChar *ExternalID = NULL;
8197 xmlChar *URI = NULL;
8198
8199 /*
8200 * We know that '<!DOCTYPE' has been detected.
8201 */
8202 SKIP(9);
8203
8204 SKIP_BLANKS;
8205
8206 /*
8207 * Parse the DOCTYPE name.
8208 */
8209 name = xmlParseName(ctxt);
8210 if (name == NULL) {
8211 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8212 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8213 }
8214 ctxt->intSubName = name;
8215
8216 SKIP_BLANKS;
8217
8218 /*
8219 * Check for SystemID and ExternalID
8220 */
8221 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8222
8223 if ((URI != NULL) || (ExternalID != NULL)) {
8224 ctxt->hasExternalSubset = 1;
8225 }
8226 ctxt->extSubURI = URI;
8227 ctxt->extSubSystem = ExternalID;
8228
8229 SKIP_BLANKS;
8230
8231 /*
8232 * Create and update the internal subset.
8233 */
8234 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8235 (!ctxt->disableSAX))
8236 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8237 if (ctxt->instate == XML_PARSER_EOF)
8238 return;
8239
8240 /*
8241 * Is there any internal subset declarations ?
8242 * they are handled separately in xmlParseInternalSubset()
8243 */
8244 if (RAW == '[')
8245 return;
8246
8247 /*
8248 * We should be at the end of the DOCTYPE declaration.
8249 */
8250 if (RAW != '>') {
8251 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8252 }
8253 NEXT;
8254 }
8255
8256 /**
8257 * xmlParseInternalSubset:
8258 * @ctxt: an XML parser context
8259 *
8260 * parse the internal subset declaration
8261 *
8262 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8263 */
8264
8265 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8266 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8267 /*
8268 * Is there any DTD definition ?
8269 */
8270 if (RAW == '[') {
8271 int baseInputNr = ctxt->inputNr;
8272 ctxt->instate = XML_PARSER_DTD;
8273 NEXT;
8274 /*
8275 * Parse the succession of Markup declarations and
8276 * PEReferences.
8277 * Subsequence (markupdecl | PEReference | S)*
8278 */
8279 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8280 (ctxt->instate != XML_PARSER_EOF)) {
8281 const xmlChar *check = CUR_PTR;
8282 unsigned int cons = ctxt->input->consumed;
8283
8284 SKIP_BLANKS;
8285 xmlParseMarkupDecl(ctxt);
8286 xmlParsePEReference(ctxt);
8287
8288 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8289 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8290 "xmlParseInternalSubset: error detected in Markup declaration\n");
8291 if (ctxt->inputNr > baseInputNr)
8292 xmlPopInput(ctxt);
8293 else
8294 break;
8295 }
8296 }
8297 if (RAW == ']') {
8298 NEXT;
8299 SKIP_BLANKS;
8300 }
8301 }
8302
8303 /*
8304 * We should be at the end of the DOCTYPE declaration.
8305 */
8306 if (RAW != '>') {
8307 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8308 return;
8309 }
8310 NEXT;
8311 }
8312
8313 #ifdef LIBXML_SAX1_ENABLED
8314 /**
8315 * xmlParseAttribute:
8316 * @ctxt: an XML parser context
8317 * @value: a xmlChar ** used to store the value of the attribute
8318 *
8319 * parse an attribute
8320 *
8321 * [41] Attribute ::= Name Eq AttValue
8322 *
8323 * [ WFC: No External Entity References ]
8324 * Attribute values cannot contain direct or indirect entity references
8325 * to external entities.
8326 *
8327 * [ WFC: No < in Attribute Values ]
8328 * The replacement text of any entity referred to directly or indirectly in
8329 * an attribute value (other than "<") must not contain a <.
8330 *
8331 * [ VC: Attribute Value Type ]
8332 * The attribute must have been declared; the value must be of the type
8333 * declared for it.
8334 *
8335 * [25] Eq ::= S? '=' S?
8336 *
8337 * With namespace:
8338 *
8339 * [NS 11] Attribute ::= QName Eq AttValue
8340 *
8341 * Also the case QName == xmlns:??? is handled independently as a namespace
8342 * definition.
8343 *
8344 * Returns the attribute name, and the value in *value.
8345 */
8346
8347 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8348 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8349 const xmlChar *name;
8350 xmlChar *val;
8351
8352 *value = NULL;
8353 GROW;
8354 name = xmlParseName(ctxt);
8355 if (name == NULL) {
8356 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8357 "error parsing attribute name\n");
8358 return(NULL);
8359 }
8360
8361 /*
8362 * read the value
8363 */
8364 SKIP_BLANKS;
8365 if (RAW == '=') {
8366 NEXT;
8367 SKIP_BLANKS;
8368 val = xmlParseAttValue(ctxt);
8369 ctxt->instate = XML_PARSER_CONTENT;
8370 } else {
8371 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8372 "Specification mandates value for attribute %s\n", name);
8373 return(NULL);
8374 }
8375
8376 /*
8377 * Check that xml:lang conforms to the specification
8378 * No more registered as an error, just generate a warning now
8379 * since this was deprecated in XML second edition
8380 */
8381 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8382 if (!xmlCheckLanguageID(val)) {
8383 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8384 "Malformed value for xml:lang : %s\n",
8385 val, NULL);
8386 }
8387 }
8388
8389 /*
8390 * Check that xml:space conforms to the specification
8391 */
8392 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8393 if (xmlStrEqual(val, BAD_CAST "default"))
8394 *(ctxt->space) = 0;
8395 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8396 *(ctxt->space) = 1;
8397 else {
8398 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8399 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8400 val, NULL);
8401 }
8402 }
8403
8404 *value = val;
8405 return(name);
8406 }
8407
8408 /**
8409 * xmlParseStartTag:
8410 * @ctxt: an XML parser context
8411 *
8412 * parse a start of tag either for rule element or
8413 * EmptyElement. In both case we don't parse the tag closing chars.
8414 *
8415 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8416 *
8417 * [ WFC: Unique Att Spec ]
8418 * No attribute name may appear more than once in the same start-tag or
8419 * empty-element tag.
8420 *
8421 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8422 *
8423 * [ WFC: Unique Att Spec ]
8424 * No attribute name may appear more than once in the same start-tag or
8425 * empty-element tag.
8426 *
8427 * With namespace:
8428 *
8429 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8430 *
8431 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8432 *
8433 * Returns the element name parsed
8434 */
8435
8436 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8437 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8438 const xmlChar *name;
8439 const xmlChar *attname;
8440 xmlChar *attvalue;
8441 const xmlChar **atts = ctxt->atts;
8442 int nbatts = 0;
8443 int maxatts = ctxt->maxatts;
8444 int i;
8445
8446 if (RAW != '<') return(NULL);
8447 NEXT1;
8448
8449 name = xmlParseName(ctxt);
8450 if (name == NULL) {
8451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8452 "xmlParseStartTag: invalid element name\n");
8453 return(NULL);
8454 }
8455
8456 /*
8457 * Now parse the attributes, it ends up with the ending
8458 *
8459 * (S Attribute)* S?
8460 */
8461 SKIP_BLANKS;
8462 GROW;
8463
8464 while (((RAW != '>') &&
8465 ((RAW != '/') || (NXT(1) != '>')) &&
8466 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8467 const xmlChar *q = CUR_PTR;
8468 unsigned int cons = ctxt->input->consumed;
8469
8470 attname = xmlParseAttribute(ctxt, &attvalue);
8471 if ((attname != NULL) && (attvalue != NULL)) {
8472 /*
8473 * [ WFC: Unique Att Spec ]
8474 * No attribute name may appear more than once in the same
8475 * start-tag or empty-element tag.
8476 */
8477 for (i = 0; i < nbatts;i += 2) {
8478 if (xmlStrEqual(atts[i], attname)) {
8479 xmlErrAttributeDup(ctxt, NULL, attname);
8480 xmlFree(attvalue);
8481 goto failed;
8482 }
8483 }
8484 /*
8485 * Add the pair to atts
8486 */
8487 if (atts == NULL) {
8488 maxatts = 22; /* allow for 10 attrs by default */
8489 atts = (const xmlChar **)
8490 xmlMalloc(maxatts * sizeof(xmlChar *));
8491 if (atts == NULL) {
8492 xmlErrMemory(ctxt, NULL);
8493 if (attvalue != NULL)
8494 xmlFree(attvalue);
8495 goto failed;
8496 }
8497 ctxt->atts = atts;
8498 ctxt->maxatts = maxatts;
8499 } else if (nbatts + 4 > maxatts) {
8500 const xmlChar **n;
8501
8502 maxatts *= 2;
8503 n = (const xmlChar **) xmlRealloc((void *) atts,
8504 maxatts * sizeof(const xmlChar *));
8505 if (n == NULL) {
8506 xmlErrMemory(ctxt, NULL);
8507 if (attvalue != NULL)
8508 xmlFree(attvalue);
8509 goto failed;
8510 }
8511 atts = n;
8512 ctxt->atts = atts;
8513 ctxt->maxatts = maxatts;
8514 }
8515 atts[nbatts++] = attname;
8516 atts[nbatts++] = attvalue;
8517 atts[nbatts] = NULL;
8518 atts[nbatts + 1] = NULL;
8519 } else {
8520 if (attvalue != NULL)
8521 xmlFree(attvalue);
8522 }
8523
8524 failed:
8525
8526 GROW
8527 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8528 break;
8529 if (SKIP_BLANKS == 0) {
8530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8531 "attributes construct error\n");
8532 }
8533 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8534 (attname == NULL) && (attvalue == NULL)) {
8535 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8536 "xmlParseStartTag: problem parsing attributes\n");
8537 break;
8538 }
8539 SHRINK;
8540 GROW;
8541 }
8542
8543 /*
8544 * SAX: Start of Element !
8545 */
8546 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8547 (!ctxt->disableSAX)) {
8548 if (nbatts > 0)
8549 ctxt->sax->startElement(ctxt->userData, name, atts);
8550 else
8551 ctxt->sax->startElement(ctxt->userData, name, NULL);
8552 }
8553
8554 if (atts != NULL) {
8555 /* Free only the content strings */
8556 for (i = 1;i < nbatts;i+=2)
8557 if (atts[i] != NULL)
8558 xmlFree((xmlChar *) atts[i]);
8559 }
8560 return(name);
8561 }
8562
8563 /**
8564 * xmlParseEndTag1:
8565 * @ctxt: an XML parser context
8566 * @line: line of the start tag
8567 * @nsNr: number of namespaces on the start tag
8568 *
8569 * parse an end of tag
8570 *
8571 * [42] ETag ::= '</' Name S? '>'
8572 *
8573 * With namespace
8574 *
8575 * [NS 9] ETag ::= '</' QName S? '>'
8576 */
8577
8578 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8579 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8580 const xmlChar *name;
8581
8582 GROW;
8583 if ((RAW != '<') || (NXT(1) != '/')) {
8584 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8585 "xmlParseEndTag: '</' not found\n");
8586 return;
8587 }
8588 SKIP(2);
8589
8590 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8591
8592 /*
8593 * We should definitely be at the ending "S? '>'" part
8594 */
8595 GROW;
8596 SKIP_BLANKS;
8597 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8598 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8599 } else
8600 NEXT1;
8601
8602 /*
8603 * [ WFC: Element Type Match ]
8604 * The Name in an element's end-tag must match the element type in the
8605 * start-tag.
8606 *
8607 */
8608 if (name != (xmlChar*)1) {
8609 if (name == NULL) name = BAD_CAST "unparseable";
8610 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8611 "Opening and ending tag mismatch: %s line %d and %s\n",
8612 ctxt->name, line, name);
8613 }
8614
8615 /*
8616 * SAX: End of Tag
8617 */
8618 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8619 (!ctxt->disableSAX))
8620 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8621
8622 namePop(ctxt);
8623 spacePop(ctxt);
8624 return;
8625 }
8626
8627 /**
8628 * xmlParseEndTag:
8629 * @ctxt: an XML parser context
8630 *
8631 * parse an end of tag
8632 *
8633 * [42] ETag ::= '</' Name S? '>'
8634 *
8635 * With namespace
8636 *
8637 * [NS 9] ETag ::= '</' QName S? '>'
8638 */
8639
8640 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8641 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8642 xmlParseEndTag1(ctxt, 0);
8643 }
8644 #endif /* LIBXML_SAX1_ENABLED */
8645
8646 /************************************************************************
8647 * *
8648 * SAX 2 specific operations *
8649 * *
8650 ************************************************************************/
8651
8652 /*
8653 * xmlGetNamespace:
8654 * @ctxt: an XML parser context
8655 * @prefix: the prefix to lookup
8656 *
8657 * Lookup the namespace name for the @prefix (which ca be NULL)
8658 * The prefix must come from the @ctxt->dict dictionary
8659 *
8660 * Returns the namespace name or NULL if not bound
8661 */
8662 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8663 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8664 int i;
8665
8666 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8667 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8668 if (ctxt->nsTab[i] == prefix) {
8669 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8670 return(NULL);
8671 return(ctxt->nsTab[i + 1]);
8672 }
8673 return(NULL);
8674 }
8675
8676 /**
8677 * xmlParseQName:
8678 * @ctxt: an XML parser context
8679 * @prefix: pointer to store the prefix part
8680 *
8681 * parse an XML Namespace QName
8682 *
8683 * [6] QName ::= (Prefix ':')? LocalPart
8684 * [7] Prefix ::= NCName
8685 * [8] LocalPart ::= NCName
8686 *
8687 * Returns the Name parsed or NULL
8688 */
8689
8690 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8691 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692 const xmlChar *l, *p;
8693
8694 GROW;
8695
8696 l = xmlParseNCName(ctxt);
8697 if (l == NULL) {
8698 if (CUR == ':') {
8699 l = xmlParseName(ctxt);
8700 if (l != NULL) {
8701 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8702 "Failed to parse QName '%s'\n", l, NULL, NULL);
8703 *prefix = NULL;
8704 return(l);
8705 }
8706 }
8707 return(NULL);
8708 }
8709 if (CUR == ':') {
8710 NEXT;
8711 p = l;
8712 l = xmlParseNCName(ctxt);
8713 if (l == NULL) {
8714 xmlChar *tmp;
8715
8716 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8717 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8718 l = xmlParseNmtoken(ctxt);
8719 if (l == NULL)
8720 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8721 else {
8722 tmp = xmlBuildQName(l, p, NULL, 0);
8723 xmlFree((char *)l);
8724 }
8725 p = xmlDictLookup(ctxt->dict, tmp, -1);
8726 if (tmp != NULL) xmlFree(tmp);
8727 *prefix = NULL;
8728 return(p);
8729 }
8730 if (CUR == ':') {
8731 xmlChar *tmp;
8732
8733 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8734 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8735 NEXT;
8736 tmp = (xmlChar *) xmlParseName(ctxt);
8737 if (tmp != NULL) {
8738 tmp = xmlBuildQName(tmp, l, NULL, 0);
8739 l = xmlDictLookup(ctxt->dict, tmp, -1);
8740 if (tmp != NULL) xmlFree(tmp);
8741 *prefix = p;
8742 return(l);
8743 }
8744 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8745 l = xmlDictLookup(ctxt->dict, tmp, -1);
8746 if (tmp != NULL) xmlFree(tmp);
8747 *prefix = p;
8748 return(l);
8749 }
8750 *prefix = p;
8751 } else
8752 *prefix = NULL;
8753 return(l);
8754 }
8755
8756 /**
8757 * xmlParseQNameAndCompare:
8758 * @ctxt: an XML parser context
8759 * @name: the localname
8760 * @prefix: the prefix, if any.
8761 *
8762 * parse an XML name and compares for match
8763 * (specialized for endtag parsing)
8764 *
8765 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8766 * and the name for mismatch
8767 */
8768
8769 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8770 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8771 xmlChar const *prefix) {
8772 const xmlChar *cmp;
8773 const xmlChar *in;
8774 const xmlChar *ret;
8775 const xmlChar *prefix2;
8776
8777 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8778
8779 GROW;
8780 in = ctxt->input->cur;
8781
8782 cmp = prefix;
8783 while (*in != 0 && *in == *cmp) {
8784 ++in;
8785 ++cmp;
8786 }
8787 if ((*cmp == 0) && (*in == ':')) {
8788 in++;
8789 cmp = name;
8790 while (*in != 0 && *in == *cmp) {
8791 ++in;
8792 ++cmp;
8793 }
8794 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8795 /* success */
8796 ctxt->input->cur = in;
8797 return((const xmlChar*) 1);
8798 }
8799 }
8800 /*
8801 * all strings coms from the dictionary, equality can be done directly
8802 */
8803 ret = xmlParseQName (ctxt, &prefix2);
8804 if ((ret == name) && (prefix == prefix2))
8805 return((const xmlChar*) 1);
8806 return ret;
8807 }
8808
8809 /**
8810 * xmlParseAttValueInternal:
8811 * @ctxt: an XML parser context
8812 * @len: attribute len result
8813 * @alloc: whether the attribute was reallocated as a new string
8814 * @normalize: if 1 then further non-CDATA normalization must be done
8815 *
8816 * parse a value for an attribute.
8817 * NOTE: if no normalization is needed, the routine will return pointers
8818 * directly from the data buffer.
8819 *
8820 * 3.3.3 Attribute-Value Normalization:
8821 * Before the value of an attribute is passed to the application or
8822 * checked for validity, the XML processor must normalize it as follows:
8823 * - a character reference is processed by appending the referenced
8824 * character to the attribute value
8825 * - an entity reference is processed by recursively processing the
8826 * replacement text of the entity
8827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8828 * appending #x20 to the normalized value, except that only a single
8829 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8830 * parsed entity or the literal entity value of an internal parsed entity
8831 * - other characters are processed by appending them to the normalized value
8832 * If the declared value is not CDATA, then the XML processor must further
8833 * process the normalized attribute value by discarding any leading and
8834 * trailing space (#x20) characters, and by replacing sequences of space
8835 * (#x20) characters by a single space (#x20) character.
8836 * All attributes for which no declaration has been read should be treated
8837 * by a non-validating parser as if declared CDATA.
8838 *
8839 * Returns the AttValue parsed or NULL. The value has to be freed by the
8840 * caller if it was copied, this can be detected by val[*len] == 0.
8841 */
8842
8843 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8844 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8845 int normalize)
8846 {
8847 xmlChar limit = 0;
8848 const xmlChar *in = NULL, *start, *end, *last;
8849 xmlChar *ret = NULL;
8850 int line, col;
8851
8852 GROW;
8853 in = (xmlChar *) CUR_PTR;
8854 line = ctxt->input->line;
8855 col = ctxt->input->col;
8856 if (*in != '"' && *in != '\'') {
8857 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8858 return (NULL);
8859 }
8860 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8861
8862 /*
8863 * try to handle in this routine the most common case where no
8864 * allocation of a new string is required and where content is
8865 * pure ASCII.
8866 */
8867 limit = *in++;
8868 col++;
8869 end = ctxt->input->end;
8870 start = in;
8871 if (in >= end) {
8872 const xmlChar *oldbase = ctxt->input->base;
8873 GROW;
8874 if (oldbase != ctxt->input->base) {
8875 long delta = ctxt->input->base - oldbase;
8876 start = start + delta;
8877 in = in + delta;
8878 }
8879 end = ctxt->input->end;
8880 }
8881 if (normalize) {
8882 /*
8883 * Skip any leading spaces
8884 */
8885 while ((in < end) && (*in != limit) &&
8886 ((*in == 0x20) || (*in == 0x9) ||
8887 (*in == 0xA) || (*in == 0xD))) {
8888 if (*in == 0xA) {
8889 line++; col = 1;
8890 } else {
8891 col++;
8892 }
8893 in++;
8894 start = in;
8895 if (in >= end) {
8896 const xmlChar *oldbase = ctxt->input->base;
8897 GROW;
8898 if (ctxt->instate == XML_PARSER_EOF)
8899 return(NULL);
8900 if (oldbase != ctxt->input->base) {
8901 long delta = ctxt->input->base - oldbase;
8902 start = start + delta;
8903 in = in + delta;
8904 }
8905 end = ctxt->input->end;
8906 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8907 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8908 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8909 "AttValue length too long\n");
8910 return(NULL);
8911 }
8912 }
8913 }
8914 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8915 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8916 col++;
8917 if ((*in++ == 0x20) && (*in == 0x20)) break;
8918 if (in >= end) {
8919 const xmlChar *oldbase = ctxt->input->base;
8920 GROW;
8921 if (ctxt->instate == XML_PARSER_EOF)
8922 return(NULL);
8923 if (oldbase != ctxt->input->base) {
8924 long delta = ctxt->input->base - oldbase;
8925 start = start + delta;
8926 in = in + delta;
8927 }
8928 end = ctxt->input->end;
8929 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8930 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8931 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8932 "AttValue length too long\n");
8933 return(NULL);
8934 }
8935 }
8936 }
8937 last = in;
8938 /*
8939 * skip the trailing blanks
8940 */
8941 while ((last[-1] == 0x20) && (last > start)) last--;
8942 while ((in < end) && (*in != limit) &&
8943 ((*in == 0x20) || (*in == 0x9) ||
8944 (*in == 0xA) || (*in == 0xD))) {
8945 if (*in == 0xA) {
8946 line++, col = 1;
8947 } else {
8948 col++;
8949 }
8950 in++;
8951 if (in >= end) {
8952 const xmlChar *oldbase = ctxt->input->base;
8953 GROW;
8954 if (ctxt->instate == XML_PARSER_EOF)
8955 return(NULL);
8956 if (oldbase != ctxt->input->base) {
8957 long delta = ctxt->input->base - oldbase;
8958 start = start + delta;
8959 in = in + delta;
8960 last = last + delta;
8961 }
8962 end = ctxt->input->end;
8963 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8964 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8965 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8966 "AttValue length too long\n");
8967 return(NULL);
8968 }
8969 }
8970 }
8971 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8972 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8973 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8974 "AttValue length too long\n");
8975 return(NULL);
8976 }
8977 if (*in != limit) goto need_complex;
8978 } else {
8979 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8980 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8981 in++;
8982 col++;
8983 if (in >= end) {
8984 const xmlChar *oldbase = ctxt->input->base;
8985 GROW;
8986 if (ctxt->instate == XML_PARSER_EOF)
8987 return(NULL);
8988 if (oldbase != ctxt->input->base) {
8989 long delta = ctxt->input->base - oldbase;
8990 start = start + delta;
8991 in = in + delta;
8992 }
8993 end = ctxt->input->end;
8994 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8995 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8996 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8997 "AttValue length too long\n");
8998 return(NULL);
8999 }
9000 }
9001 }
9002 last = in;
9003 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9004 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9005 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9006 "AttValue length too long\n");
9007 return(NULL);
9008 }
9009 if (*in != limit) goto need_complex;
9010 }
9011 in++;
9012 col++;
9013 if (len != NULL) {
9014 *len = last - start;
9015 ret = (xmlChar *) start;
9016 } else {
9017 if (alloc) *alloc = 1;
9018 ret = xmlStrndup(start, last - start);
9019 }
9020 CUR_PTR = in;
9021 ctxt->input->line = line;
9022 ctxt->input->col = col;
9023 if (alloc) *alloc = 0;
9024 return ret;
9025 need_complex:
9026 if (alloc) *alloc = 1;
9027 return xmlParseAttValueComplex(ctxt, len, normalize);
9028 }
9029
9030 /**
9031 * xmlParseAttribute2:
9032 * @ctxt: an XML parser context
9033 * @pref: the element prefix
9034 * @elem: the element name
9035 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9036 * @value: a xmlChar ** used to store the value of the attribute
9037 * @len: an int * to save the length of the attribute
9038 * @alloc: an int * to indicate if the attribute was allocated
9039 *
9040 * parse an attribute in the new SAX2 framework.
9041 *
9042 * Returns the attribute name, and the value in *value, .
9043 */
9044
9045 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)9046 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9047 const xmlChar * pref, const xmlChar * elem,
9048 const xmlChar ** prefix, xmlChar ** value,
9049 int *len, int *alloc)
9050 {
9051 const xmlChar *name;
9052 xmlChar *val, *internal_val = NULL;
9053 int normalize = 0;
9054
9055 *value = NULL;
9056 GROW;
9057 name = xmlParseQName(ctxt, prefix);
9058 if (name == NULL) {
9059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9060 "error parsing attribute name\n");
9061 return (NULL);
9062 }
9063
9064 /*
9065 * get the type if needed
9066 */
9067 if (ctxt->attsSpecial != NULL) {
9068 int type;
9069
9070 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9071 pref, elem, *prefix, name);
9072 if (type != 0)
9073 normalize = 1;
9074 }
9075
9076 /*
9077 * read the value
9078 */
9079 SKIP_BLANKS;
9080 if (RAW == '=') {
9081 NEXT;
9082 SKIP_BLANKS;
9083 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9084 if (normalize) {
9085 /*
9086 * Sometimes a second normalisation pass for spaces is needed
9087 * but that only happens if charrefs or entities refernces
9088 * have been used in the attribute value, i.e. the attribute
9089 * value have been extracted in an allocated string already.
9090 */
9091 if (*alloc) {
9092 const xmlChar *val2;
9093
9094 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9095 if ((val2 != NULL) && (val2 != val)) {
9096 xmlFree(val);
9097 val = (xmlChar *) val2;
9098 }
9099 }
9100 }
9101 ctxt->instate = XML_PARSER_CONTENT;
9102 } else {
9103 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9104 "Specification mandates value for attribute %s\n",
9105 name);
9106 return (NULL);
9107 }
9108
9109 if (*prefix == ctxt->str_xml) {
9110 /*
9111 * Check that xml:lang conforms to the specification
9112 * No more registered as an error, just generate a warning now
9113 * since this was deprecated in XML second edition
9114 */
9115 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9116 internal_val = xmlStrndup(val, *len);
9117 if (!xmlCheckLanguageID(internal_val)) {
9118 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9119 "Malformed value for xml:lang : %s\n",
9120 internal_val, NULL);
9121 }
9122 }
9123
9124 /*
9125 * Check that xml:space conforms to the specification
9126 */
9127 if (xmlStrEqual(name, BAD_CAST "space")) {
9128 internal_val = xmlStrndup(val, *len);
9129 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9130 *(ctxt->space) = 0;
9131 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9132 *(ctxt->space) = 1;
9133 else {
9134 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9135 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9136 internal_val, NULL);
9137 }
9138 }
9139 if (internal_val) {
9140 xmlFree(internal_val);
9141 }
9142 }
9143
9144 *value = val;
9145 return (name);
9146 }
9147 /**
9148 * xmlParseStartTag2:
9149 * @ctxt: an XML parser context
9150 *
9151 * parse a start of tag either for rule element or
9152 * EmptyElement. In both case we don't parse the tag closing chars.
9153 * This routine is called when running SAX2 parsing
9154 *
9155 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9156 *
9157 * [ WFC: Unique Att Spec ]
9158 * No attribute name may appear more than once in the same start-tag or
9159 * empty-element tag.
9160 *
9161 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9162 *
9163 * [ WFC: Unique Att Spec ]
9164 * No attribute name may appear more than once in the same start-tag or
9165 * empty-element tag.
9166 *
9167 * With namespace:
9168 *
9169 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9170 *
9171 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9172 *
9173 * Returns the element name parsed
9174 */
9175
9176 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)9177 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9178 const xmlChar **URI, int *tlen) {
9179 const xmlChar *localname;
9180 const xmlChar *prefix;
9181 const xmlChar *attname;
9182 const xmlChar *aprefix;
9183 const xmlChar *nsname;
9184 xmlChar *attvalue;
9185 const xmlChar **atts = ctxt->atts;
9186 int maxatts = ctxt->maxatts;
9187 int nratts, nbatts, nbdef, inputid;
9188 int i, j, nbNs, attval;
9189 unsigned long cur;
9190 int nsNr = ctxt->nsNr;
9191
9192 if (RAW != '<') return(NULL);
9193 NEXT1;
9194
9195 /*
9196 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9197 * point since the attribute values may be stored as pointers to
9198 * the buffer and calling SHRINK would destroy them !
9199 * The Shrinking is only possible once the full set of attribute
9200 * callbacks have been done.
9201 */
9202 SHRINK;
9203 cur = ctxt->input->cur - ctxt->input->base;
9204 inputid = ctxt->input->id;
9205 nbatts = 0;
9206 nratts = 0;
9207 nbdef = 0;
9208 nbNs = 0;
9209 attval = 0;
9210 /* Forget any namespaces added during an earlier parse of this element. */
9211 ctxt->nsNr = nsNr;
9212
9213 localname = xmlParseQName(ctxt, &prefix);
9214 if (localname == NULL) {
9215 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9216 "StartTag: invalid element name\n");
9217 return(NULL);
9218 }
9219 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9220
9221 /*
9222 * Now parse the attributes, it ends up with the ending
9223 *
9224 * (S Attribute)* S?
9225 */
9226 SKIP_BLANKS;
9227 GROW;
9228
9229 while (((RAW != '>') &&
9230 ((RAW != '/') || (NXT(1) != '>')) &&
9231 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9232 const xmlChar *q = CUR_PTR;
9233 unsigned int cons = ctxt->input->consumed;
9234 int len = -1, alloc = 0;
9235
9236 attname = xmlParseAttribute2(ctxt, prefix, localname,
9237 &aprefix, &attvalue, &len, &alloc);
9238 if ((attname == NULL) || (attvalue == NULL))
9239 goto next_attr;
9240 if (len < 0) len = xmlStrlen(attvalue);
9241
9242 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9243 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9244 xmlURIPtr uri;
9245
9246 if (URL == NULL) {
9247 xmlErrMemory(ctxt, "dictionary allocation failure");
9248 if ((attvalue != NULL) && (alloc != 0))
9249 xmlFree(attvalue);
9250 return(NULL);
9251 }
9252 if (*URL != 0) {
9253 uri = xmlParseURI((const char *) URL);
9254 if (uri == NULL) {
9255 xmlNsErr(ctxt, XML_WAR_NS_URI,
9256 "xmlns: '%s' is not a valid URI\n",
9257 URL, NULL, NULL);
9258 } else {
9259 if (uri->scheme == NULL) {
9260 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9261 "xmlns: URI %s is not absolute\n",
9262 URL, NULL, NULL);
9263 }
9264 xmlFreeURI(uri);
9265 }
9266 if (URL == ctxt->str_xml_ns) {
9267 if (attname != ctxt->str_xml) {
9268 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9269 "xml namespace URI cannot be the default namespace\n",
9270 NULL, NULL, NULL);
9271 }
9272 goto next_attr;
9273 }
9274 if ((len == 29) &&
9275 (xmlStrEqual(URL,
9276 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9277 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9278 "reuse of the xmlns namespace name is forbidden\n",
9279 NULL, NULL, NULL);
9280 goto next_attr;
9281 }
9282 }
9283 /*
9284 * check that it's not a defined namespace
9285 */
9286 for (j = 1;j <= nbNs;j++)
9287 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9288 break;
9289 if (j <= nbNs)
9290 xmlErrAttributeDup(ctxt, NULL, attname);
9291 else
9292 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9293
9294 } else if (aprefix == ctxt->str_xmlns) {
9295 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9296 xmlURIPtr uri;
9297
9298 if (attname == ctxt->str_xml) {
9299 if (URL != ctxt->str_xml_ns) {
9300 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9301 "xml namespace prefix mapped to wrong URI\n",
9302 NULL, NULL, NULL);
9303 }
9304 /*
9305 * Do not keep a namespace definition node
9306 */
9307 goto next_attr;
9308 }
9309 if (URL == ctxt->str_xml_ns) {
9310 if (attname != ctxt->str_xml) {
9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312 "xml namespace URI mapped to wrong prefix\n",
9313 NULL, NULL, NULL);
9314 }
9315 goto next_attr;
9316 }
9317 if (attname == ctxt->str_xmlns) {
9318 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319 "redefinition of the xmlns prefix is forbidden\n",
9320 NULL, NULL, NULL);
9321 goto next_attr;
9322 }
9323 if ((len == 29) &&
9324 (xmlStrEqual(URL,
9325 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9326 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9327 "reuse of the xmlns namespace name is forbidden\n",
9328 NULL, NULL, NULL);
9329 goto next_attr;
9330 }
9331 if ((URL == NULL) || (URL[0] == 0)) {
9332 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9333 "xmlns:%s: Empty XML namespace is not allowed\n",
9334 attname, NULL, NULL);
9335 goto next_attr;
9336 } else {
9337 uri = xmlParseURI((const char *) URL);
9338 if (uri == NULL) {
9339 xmlNsErr(ctxt, XML_WAR_NS_URI,
9340 "xmlns:%s: '%s' is not a valid URI\n",
9341 attname, URL, NULL);
9342 } else {
9343 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9344 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9345 "xmlns:%s: URI %s is not absolute\n",
9346 attname, URL, NULL);
9347 }
9348 xmlFreeURI(uri);
9349 }
9350 }
9351
9352 /*
9353 * check that it's not a defined namespace
9354 */
9355 for (j = 1;j <= nbNs;j++)
9356 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9357 break;
9358 if (j <= nbNs)
9359 xmlErrAttributeDup(ctxt, aprefix, attname);
9360 else
9361 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9362
9363 } else {
9364 /*
9365 * Add the pair to atts
9366 */
9367 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9368 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9369 goto next_attr;
9370 }
9371 maxatts = ctxt->maxatts;
9372 atts = ctxt->atts;
9373 }
9374 ctxt->attallocs[nratts++] = alloc;
9375 atts[nbatts++] = attname;
9376 atts[nbatts++] = aprefix;
9377 /*
9378 * The namespace URI field is used temporarily to point at the
9379 * base of the current input buffer for non-alloced attributes.
9380 * When the input buffer is reallocated, all the pointers become
9381 * invalid, but they can be reconstructed later.
9382 */
9383 if (alloc)
9384 atts[nbatts++] = NULL;
9385 else
9386 atts[nbatts++] = ctxt->input->base;
9387 atts[nbatts++] = attvalue;
9388 attvalue += len;
9389 atts[nbatts++] = attvalue;
9390 /*
9391 * tag if some deallocation is needed
9392 */
9393 if (alloc != 0) attval = 1;
9394 attvalue = NULL; /* moved into atts */
9395 }
9396
9397 next_attr:
9398 if ((attvalue != NULL) && (alloc != 0)) {
9399 xmlFree(attvalue);
9400 attvalue = NULL;
9401 }
9402
9403 GROW
9404 if (ctxt->instate == XML_PARSER_EOF)
9405 break;
9406 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9407 break;
9408 if (SKIP_BLANKS == 0) {
9409 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9410 "attributes construct error\n");
9411 break;
9412 }
9413 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9414 (attname == NULL) && (attvalue == NULL)) {
9415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9416 "xmlParseStartTag: problem parsing attributes\n");
9417 break;
9418 }
9419 GROW;
9420 }
9421
9422 if (ctxt->input->id != inputid) {
9423 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9424 "Unexpected change of input\n");
9425 localname = NULL;
9426 goto done;
9427 }
9428
9429 /* Reconstruct attribute value pointers. */
9430 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9431 if (atts[i+2] != NULL) {
9432 /*
9433 * Arithmetic on dangling pointers is technically undefined
9434 * behavior, but well...
9435 */
9436 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9437 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9438 atts[i+3] += offset; /* value */
9439 atts[i+4] += offset; /* valuend */
9440 }
9441 }
9442
9443 /*
9444 * The attributes defaulting
9445 */
9446 if (ctxt->attsDefault != NULL) {
9447 xmlDefAttrsPtr defaults;
9448
9449 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9450 if (defaults != NULL) {
9451 for (i = 0;i < defaults->nbAttrs;i++) {
9452 attname = defaults->values[5 * i];
9453 aprefix = defaults->values[5 * i + 1];
9454
9455 /*
9456 * special work for namespaces defaulted defs
9457 */
9458 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9459 /*
9460 * check that it's not a defined namespace
9461 */
9462 for (j = 1;j <= nbNs;j++)
9463 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9464 break;
9465 if (j <= nbNs) continue;
9466
9467 nsname = xmlGetNamespace(ctxt, NULL);
9468 if (nsname != defaults->values[5 * i + 2]) {
9469 if (nsPush(ctxt, NULL,
9470 defaults->values[5 * i + 2]) > 0)
9471 nbNs++;
9472 }
9473 } else if (aprefix == ctxt->str_xmlns) {
9474 /*
9475 * check that it's not a defined namespace
9476 */
9477 for (j = 1;j <= nbNs;j++)
9478 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9479 break;
9480 if (j <= nbNs) continue;
9481
9482 nsname = xmlGetNamespace(ctxt, attname);
9483 if (nsname != defaults->values[2]) {
9484 if (nsPush(ctxt, attname,
9485 defaults->values[5 * i + 2]) > 0)
9486 nbNs++;
9487 }
9488 } else {
9489 /*
9490 * check that it's not a defined attribute
9491 */
9492 for (j = 0;j < nbatts;j+=5) {
9493 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9494 break;
9495 }
9496 if (j < nbatts) continue;
9497
9498 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9499 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9500 return(NULL);
9501 }
9502 maxatts = ctxt->maxatts;
9503 atts = ctxt->atts;
9504 }
9505 atts[nbatts++] = attname;
9506 atts[nbatts++] = aprefix;
9507 if (aprefix == NULL)
9508 atts[nbatts++] = NULL;
9509 else
9510 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9511 atts[nbatts++] = defaults->values[5 * i + 2];
9512 atts[nbatts++] = defaults->values[5 * i + 3];
9513 if ((ctxt->standalone == 1) &&
9514 (defaults->values[5 * i + 4] != NULL)) {
9515 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9516 "standalone: attribute %s on %s defaulted from external subset\n",
9517 attname, localname);
9518 }
9519 nbdef++;
9520 }
9521 }
9522 }
9523 }
9524
9525 /*
9526 * The attributes checkings
9527 */
9528 for (i = 0; i < nbatts;i += 5) {
9529 /*
9530 * The default namespace does not apply to attribute names.
9531 */
9532 if (atts[i + 1] != NULL) {
9533 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9534 if (nsname == NULL) {
9535 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9536 "Namespace prefix %s for %s on %s is not defined\n",
9537 atts[i + 1], atts[i], localname);
9538 }
9539 atts[i + 2] = nsname;
9540 } else
9541 nsname = NULL;
9542 /*
9543 * [ WFC: Unique Att Spec ]
9544 * No attribute name may appear more than once in the same
9545 * start-tag or empty-element tag.
9546 * As extended by the Namespace in XML REC.
9547 */
9548 for (j = 0; j < i;j += 5) {
9549 if (atts[i] == atts[j]) {
9550 if (atts[i+1] == atts[j+1]) {
9551 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9552 break;
9553 }
9554 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9555 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9556 "Namespaced Attribute %s in '%s' redefined\n",
9557 atts[i], nsname, NULL);
9558 break;
9559 }
9560 }
9561 }
9562 }
9563
9564 nsname = xmlGetNamespace(ctxt, prefix);
9565 if ((prefix != NULL) && (nsname == NULL)) {
9566 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9567 "Namespace prefix %s on %s is not defined\n",
9568 prefix, localname, NULL);
9569 }
9570 *pref = prefix;
9571 *URI = nsname;
9572
9573 /*
9574 * SAX: Start of Element !
9575 */
9576 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9577 (!ctxt->disableSAX)) {
9578 if (nbNs > 0)
9579 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9580 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9581 nbatts / 5, nbdef, atts);
9582 else
9583 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9584 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9585 }
9586
9587 done:
9588 /*
9589 * Free up attribute allocated strings if needed
9590 */
9591 if (attval != 0) {
9592 for (i = 3,j = 0; j < nratts;i += 5,j++)
9593 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9594 xmlFree((xmlChar *) atts[i]);
9595 }
9596
9597 return(localname);
9598 }
9599
9600 /**
9601 * xmlParseEndTag2:
9602 * @ctxt: an XML parser context
9603 * @line: line of the start tag
9604 * @nsNr: number of namespaces on the start tag
9605 *
9606 * parse an end of tag
9607 *
9608 * [42] ETag ::= '</' Name S? '>'
9609 *
9610 * With namespace
9611 *
9612 * [NS 9] ETag ::= '</' QName S? '>'
9613 */
9614
9615 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9616 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9617 const xmlChar *URI, int line, int nsNr, int tlen) {
9618 const xmlChar *name;
9619 size_t curLength;
9620
9621 GROW;
9622 if ((RAW != '<') || (NXT(1) != '/')) {
9623 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9624 return;
9625 }
9626 SKIP(2);
9627
9628 curLength = ctxt->input->end - ctxt->input->cur;
9629 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9630 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9631 if ((curLength >= (size_t)(tlen + 1)) &&
9632 (ctxt->input->cur[tlen] == '>')) {
9633 ctxt->input->cur += tlen + 1;
9634 ctxt->input->col += tlen + 1;
9635 goto done;
9636 }
9637 ctxt->input->cur += tlen;
9638 ctxt->input->col += tlen;
9639 name = (xmlChar*)1;
9640 } else {
9641 if (prefix == NULL)
9642 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9643 else
9644 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9645 }
9646
9647 /*
9648 * We should definitely be at the ending "S? '>'" part
9649 */
9650 GROW;
9651 if (ctxt->instate == XML_PARSER_EOF)
9652 return;
9653 SKIP_BLANKS;
9654 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9655 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9656 } else
9657 NEXT1;
9658
9659 /*
9660 * [ WFC: Element Type Match ]
9661 * The Name in an element's end-tag must match the element type in the
9662 * start-tag.
9663 *
9664 */
9665 if (name != (xmlChar*)1) {
9666 if (name == NULL) name = BAD_CAST "unparseable";
9667 if ((line == 0) && (ctxt->node != NULL))
9668 line = ctxt->node->line;
9669 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9670 "Opening and ending tag mismatch: %s line %d and %s\n",
9671 ctxt->name, line, name);
9672 }
9673
9674 /*
9675 * SAX: End of Tag
9676 */
9677 done:
9678 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9679 (!ctxt->disableSAX))
9680 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9681
9682 spacePop(ctxt);
9683 if (nsNr != 0)
9684 nsPop(ctxt, nsNr);
9685 return;
9686 }
9687
9688 /**
9689 * xmlParseCDSect:
9690 * @ctxt: an XML parser context
9691 *
9692 * Parse escaped pure raw content.
9693 *
9694 * [18] CDSect ::= CDStart CData CDEnd
9695 *
9696 * [19] CDStart ::= '<![CDATA['
9697 *
9698 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9699 *
9700 * [21] CDEnd ::= ']]>'
9701 */
9702 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9703 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9704 xmlChar *buf = NULL;
9705 int len = 0;
9706 int size = XML_PARSER_BUFFER_SIZE;
9707 int r, rl;
9708 int s, sl;
9709 int cur, l;
9710 int count = 0;
9711
9712 /* Check 2.6.0 was NXT(0) not RAW */
9713 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9714 SKIP(9);
9715 } else
9716 return;
9717
9718 ctxt->instate = XML_PARSER_CDATA_SECTION;
9719 r = CUR_CHAR(rl);
9720 if (!IS_CHAR(r)) {
9721 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9722 ctxt->instate = XML_PARSER_CONTENT;
9723 return;
9724 }
9725 NEXTL(rl);
9726 s = CUR_CHAR(sl);
9727 if (!IS_CHAR(s)) {
9728 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9729 ctxt->instate = XML_PARSER_CONTENT;
9730 return;
9731 }
9732 NEXTL(sl);
9733 cur = CUR_CHAR(l);
9734 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9735 if (buf == NULL) {
9736 xmlErrMemory(ctxt, NULL);
9737 return;
9738 }
9739 while (IS_CHAR(cur) &&
9740 ((r != ']') || (s != ']') || (cur != '>'))) {
9741 if (len + 5 >= size) {
9742 xmlChar *tmp;
9743
9744 if ((size > XML_MAX_TEXT_LENGTH) &&
9745 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9746 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9747 "CData section too big found", NULL);
9748 xmlFree (buf);
9749 return;
9750 }
9751 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9752 if (tmp == NULL) {
9753 xmlFree(buf);
9754 xmlErrMemory(ctxt, NULL);
9755 return;
9756 }
9757 buf = tmp;
9758 size *= 2;
9759 }
9760 COPY_BUF(rl,buf,len,r);
9761 r = s;
9762 rl = sl;
9763 s = cur;
9764 sl = l;
9765 count++;
9766 if (count > 50) {
9767 GROW;
9768 if (ctxt->instate == XML_PARSER_EOF) {
9769 xmlFree(buf);
9770 return;
9771 }
9772 count = 0;
9773 }
9774 NEXTL(l);
9775 cur = CUR_CHAR(l);
9776 }
9777 buf[len] = 0;
9778 ctxt->instate = XML_PARSER_CONTENT;
9779 if (cur != '>') {
9780 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9781 "CData section not finished\n%.50s\n", buf);
9782 xmlFree(buf);
9783 return;
9784 }
9785 NEXTL(l);
9786
9787 /*
9788 * OK the buffer is to be consumed as cdata.
9789 */
9790 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9791 if (ctxt->sax->cdataBlock != NULL)
9792 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9793 else if (ctxt->sax->characters != NULL)
9794 ctxt->sax->characters(ctxt->userData, buf, len);
9795 }
9796 xmlFree(buf);
9797 }
9798
9799 /**
9800 * xmlParseContent:
9801 * @ctxt: an XML parser context
9802 *
9803 * Parse a content:
9804 *
9805 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9806 */
9807
9808 void
xmlParseContent(xmlParserCtxtPtr ctxt)9809 xmlParseContent(xmlParserCtxtPtr ctxt) {
9810 GROW;
9811 while ((RAW != 0) &&
9812 ((RAW != '<') || (NXT(1) != '/')) &&
9813 (ctxt->instate != XML_PARSER_EOF)) {
9814 const xmlChar *test = CUR_PTR;
9815 unsigned int cons = ctxt->input->consumed;
9816 const xmlChar *cur = ctxt->input->cur;
9817
9818 /*
9819 * First case : a Processing Instruction.
9820 */
9821 if ((*cur == '<') && (cur[1] == '?')) {
9822 xmlParsePI(ctxt);
9823 }
9824
9825 /*
9826 * Second case : a CDSection
9827 */
9828 /* 2.6.0 test was *cur not RAW */
9829 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9830 xmlParseCDSect(ctxt);
9831 }
9832
9833 /*
9834 * Third case : a comment
9835 */
9836 else if ((*cur == '<') && (NXT(1) == '!') &&
9837 (NXT(2) == '-') && (NXT(3) == '-')) {
9838 xmlParseComment(ctxt);
9839 ctxt->instate = XML_PARSER_CONTENT;
9840 }
9841
9842 /*
9843 * Fourth case : a sub-element.
9844 */
9845 else if (*cur == '<') {
9846 xmlParseElement(ctxt);
9847 }
9848
9849 /*
9850 * Fifth case : a reference. If if has not been resolved,
9851 * parsing returns it's Name, create the node
9852 */
9853
9854 else if (*cur == '&') {
9855 xmlParseReference(ctxt);
9856 }
9857
9858 /*
9859 * Last case, text. Note that References are handled directly.
9860 */
9861 else {
9862 xmlParseCharData(ctxt, 0);
9863 }
9864
9865 GROW;
9866 SHRINK;
9867
9868 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9869 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9870 "detected an error in element content\n");
9871 xmlHaltParser(ctxt);
9872 break;
9873 }
9874 }
9875 }
9876
9877 /**
9878 * xmlParseElement:
9879 * @ctxt: an XML parser context
9880 *
9881 * parse an XML element, this is highly recursive
9882 *
9883 * [39] element ::= EmptyElemTag | STag content ETag
9884 *
9885 * [ WFC: Element Type Match ]
9886 * The Name in an element's end-tag must match the element type in the
9887 * start-tag.
9888 *
9889 */
9890
9891 void
xmlParseElement(xmlParserCtxtPtr ctxt)9892 xmlParseElement(xmlParserCtxtPtr ctxt) {
9893 const xmlChar *name;
9894 const xmlChar *prefix = NULL;
9895 const xmlChar *URI = NULL;
9896 xmlParserNodeInfo node_info;
9897 int line, tlen = 0;
9898 xmlNodePtr ret;
9899 int nsNr = ctxt->nsNr;
9900
9901 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9902 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9903 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9904 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9905 xmlParserMaxDepth);
9906 xmlHaltParser(ctxt);
9907 return;
9908 }
9909
9910 /* Capture start position */
9911 if (ctxt->record_info) {
9912 node_info.begin_pos = ctxt->input->consumed +
9913 (CUR_PTR - ctxt->input->base);
9914 node_info.begin_line = ctxt->input->line;
9915 }
9916
9917 if (ctxt->spaceNr == 0)
9918 spacePush(ctxt, -1);
9919 else if (*ctxt->space == -2)
9920 spacePush(ctxt, -1);
9921 else
9922 spacePush(ctxt, *ctxt->space);
9923
9924 line = ctxt->input->line;
9925 #ifdef LIBXML_SAX1_ENABLED
9926 if (ctxt->sax2)
9927 #endif /* LIBXML_SAX1_ENABLED */
9928 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9929 #ifdef LIBXML_SAX1_ENABLED
9930 else
9931 name = xmlParseStartTag(ctxt);
9932 #endif /* LIBXML_SAX1_ENABLED */
9933 if (ctxt->instate == XML_PARSER_EOF)
9934 return;
9935 if (name == NULL) {
9936 spacePop(ctxt);
9937 return;
9938 }
9939 namePush(ctxt, name);
9940 ret = ctxt->node;
9941
9942 #ifdef LIBXML_VALID_ENABLED
9943 /*
9944 * [ VC: Root Element Type ]
9945 * The Name in the document type declaration must match the element
9946 * type of the root element.
9947 */
9948 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9949 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9950 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9951 #endif /* LIBXML_VALID_ENABLED */
9952
9953 /*
9954 * Check for an Empty Element.
9955 */
9956 if ((RAW == '/') && (NXT(1) == '>')) {
9957 SKIP(2);
9958 if (ctxt->sax2) {
9959 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9960 (!ctxt->disableSAX))
9961 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9962 #ifdef LIBXML_SAX1_ENABLED
9963 } else {
9964 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9965 (!ctxt->disableSAX))
9966 ctxt->sax->endElement(ctxt->userData, name);
9967 #endif /* LIBXML_SAX1_ENABLED */
9968 }
9969 namePop(ctxt);
9970 spacePop(ctxt);
9971 if (nsNr != ctxt->nsNr)
9972 nsPop(ctxt, ctxt->nsNr - nsNr);
9973 if ( ret != NULL && ctxt->record_info ) {
9974 node_info.end_pos = ctxt->input->consumed +
9975 (CUR_PTR - ctxt->input->base);
9976 node_info.end_line = ctxt->input->line;
9977 node_info.node = ret;
9978 xmlParserAddNodeInfo(ctxt, &node_info);
9979 }
9980 return;
9981 }
9982 if (RAW == '>') {
9983 NEXT1;
9984 } else {
9985 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9986 "Couldn't find end of Start Tag %s line %d\n",
9987 name, line, NULL);
9988
9989 /*
9990 * end of parsing of this node.
9991 */
9992 nodePop(ctxt);
9993 namePop(ctxt);
9994 spacePop(ctxt);
9995 if (nsNr != ctxt->nsNr)
9996 nsPop(ctxt, ctxt->nsNr - nsNr);
9997
9998 /*
9999 * Capture end position and add node
10000 */
10001 if ( ret != NULL && ctxt->record_info ) {
10002 node_info.end_pos = ctxt->input->consumed +
10003 (CUR_PTR - ctxt->input->base);
10004 node_info.end_line = ctxt->input->line;
10005 node_info.node = ret;
10006 xmlParserAddNodeInfo(ctxt, &node_info);
10007 }
10008 return;
10009 }
10010
10011 /*
10012 * Parse the content of the element:
10013 */
10014 xmlParseContent(ctxt);
10015 if (ctxt->instate == XML_PARSER_EOF)
10016 return;
10017 if (!IS_BYTE_CHAR(RAW)) {
10018 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10019 "Premature end of data in tag %s line %d\n",
10020 name, line, NULL);
10021
10022 /*
10023 * end of parsing of this node.
10024 */
10025 nodePop(ctxt);
10026 namePop(ctxt);
10027 spacePop(ctxt);
10028 if (nsNr != ctxt->nsNr)
10029 nsPop(ctxt, ctxt->nsNr - nsNr);
10030 return;
10031 }
10032
10033 /*
10034 * parse the end of tag: '</' should be here.
10035 */
10036 if (ctxt->sax2) {
10037 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10038 namePop(ctxt);
10039 }
10040 #ifdef LIBXML_SAX1_ENABLED
10041 else
10042 xmlParseEndTag1(ctxt, line);
10043 #endif /* LIBXML_SAX1_ENABLED */
10044
10045 /*
10046 * Capture end position and add node
10047 */
10048 if ( ret != NULL && ctxt->record_info ) {
10049 node_info.end_pos = ctxt->input->consumed +
10050 (CUR_PTR - ctxt->input->base);
10051 node_info.end_line = ctxt->input->line;
10052 node_info.node = ret;
10053 xmlParserAddNodeInfo(ctxt, &node_info);
10054 }
10055 }
10056
10057 /**
10058 * xmlParseVersionNum:
10059 * @ctxt: an XML parser context
10060 *
10061 * parse the XML version value.
10062 *
10063 * [26] VersionNum ::= '1.' [0-9]+
10064 *
10065 * In practice allow [0-9].[0-9]+ at that level
10066 *
10067 * Returns the string giving the XML version number, or NULL
10068 */
10069 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10070 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10071 xmlChar *buf = NULL;
10072 int len = 0;
10073 int size = 10;
10074 xmlChar cur;
10075
10076 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10077 if (buf == NULL) {
10078 xmlErrMemory(ctxt, NULL);
10079 return(NULL);
10080 }
10081 cur = CUR;
10082 if (!((cur >= '0') && (cur <= '9'))) {
10083 xmlFree(buf);
10084 return(NULL);
10085 }
10086 buf[len++] = cur;
10087 NEXT;
10088 cur=CUR;
10089 if (cur != '.') {
10090 xmlFree(buf);
10091 return(NULL);
10092 }
10093 buf[len++] = cur;
10094 NEXT;
10095 cur=CUR;
10096 while ((cur >= '0') && (cur <= '9')) {
10097 if (len + 1 >= size) {
10098 xmlChar *tmp;
10099
10100 size *= 2;
10101 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10102 if (tmp == NULL) {
10103 xmlFree(buf);
10104 xmlErrMemory(ctxt, NULL);
10105 return(NULL);
10106 }
10107 buf = tmp;
10108 }
10109 buf[len++] = cur;
10110 NEXT;
10111 cur=CUR;
10112 }
10113 buf[len] = 0;
10114 return(buf);
10115 }
10116
10117 /**
10118 * xmlParseVersionInfo:
10119 * @ctxt: an XML parser context
10120 *
10121 * parse the XML version.
10122 *
10123 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10124 *
10125 * [25] Eq ::= S? '=' S?
10126 *
10127 * Returns the version string, e.g. "1.0"
10128 */
10129
10130 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10131 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10132 xmlChar *version = NULL;
10133
10134 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10135 SKIP(7);
10136 SKIP_BLANKS;
10137 if (RAW != '=') {
10138 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10139 return(NULL);
10140 }
10141 NEXT;
10142 SKIP_BLANKS;
10143 if (RAW == '"') {
10144 NEXT;
10145 version = xmlParseVersionNum(ctxt);
10146 if (RAW != '"') {
10147 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10148 } else
10149 NEXT;
10150 } else if (RAW == '\''){
10151 NEXT;
10152 version = xmlParseVersionNum(ctxt);
10153 if (RAW != '\'') {
10154 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10155 } else
10156 NEXT;
10157 } else {
10158 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10159 }
10160 }
10161 return(version);
10162 }
10163
10164 /**
10165 * xmlParseEncName:
10166 * @ctxt: an XML parser context
10167 *
10168 * parse the XML encoding name
10169 *
10170 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10171 *
10172 * Returns the encoding name value or NULL
10173 */
10174 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10175 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10176 xmlChar *buf = NULL;
10177 int len = 0;
10178 int size = 10;
10179 xmlChar cur;
10180
10181 cur = CUR;
10182 if (((cur >= 'a') && (cur <= 'z')) ||
10183 ((cur >= 'A') && (cur <= 'Z'))) {
10184 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10185 if (buf == NULL) {
10186 xmlErrMemory(ctxt, NULL);
10187 return(NULL);
10188 }
10189
10190 buf[len++] = cur;
10191 NEXT;
10192 cur = CUR;
10193 while (((cur >= 'a') && (cur <= 'z')) ||
10194 ((cur >= 'A') && (cur <= 'Z')) ||
10195 ((cur >= '0') && (cur <= '9')) ||
10196 (cur == '.') || (cur == '_') ||
10197 (cur == '-')) {
10198 if (len + 1 >= size) {
10199 xmlChar *tmp;
10200
10201 size *= 2;
10202 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10203 if (tmp == NULL) {
10204 xmlErrMemory(ctxt, NULL);
10205 xmlFree(buf);
10206 return(NULL);
10207 }
10208 buf = tmp;
10209 }
10210 buf[len++] = cur;
10211 NEXT;
10212 cur = CUR;
10213 if (cur == 0) {
10214 SHRINK;
10215 GROW;
10216 cur = CUR;
10217 }
10218 }
10219 buf[len] = 0;
10220 } else {
10221 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10222 }
10223 return(buf);
10224 }
10225
10226 /**
10227 * xmlParseEncodingDecl:
10228 * @ctxt: an XML parser context
10229 *
10230 * parse the XML encoding declaration
10231 *
10232 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10233 *
10234 * this setups the conversion filters.
10235 *
10236 * Returns the encoding value or NULL
10237 */
10238
10239 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10240 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10241 xmlChar *encoding = NULL;
10242
10243 SKIP_BLANKS;
10244 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10245 SKIP(8);
10246 SKIP_BLANKS;
10247 if (RAW != '=') {
10248 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10249 return(NULL);
10250 }
10251 NEXT;
10252 SKIP_BLANKS;
10253 if (RAW == '"') {
10254 NEXT;
10255 encoding = xmlParseEncName(ctxt);
10256 if (RAW != '"') {
10257 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10258 xmlFree((xmlChar *) encoding);
10259 return(NULL);
10260 } else
10261 NEXT;
10262 } else if (RAW == '\''){
10263 NEXT;
10264 encoding = xmlParseEncName(ctxt);
10265 if (RAW != '\'') {
10266 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10267 xmlFree((xmlChar *) encoding);
10268 return(NULL);
10269 } else
10270 NEXT;
10271 } else {
10272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10273 }
10274
10275 /*
10276 * Non standard parsing, allowing the user to ignore encoding
10277 */
10278 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10279 xmlFree((xmlChar *) encoding);
10280 return(NULL);
10281 }
10282
10283 /*
10284 * UTF-16 encoding stwich has already taken place at this stage,
10285 * more over the little-endian/big-endian selection is already done
10286 */
10287 if ((encoding != NULL) &&
10288 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10289 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10290 /*
10291 * If no encoding was passed to the parser, that we are
10292 * using UTF-16 and no decoder is present i.e. the
10293 * document is apparently UTF-8 compatible, then raise an
10294 * encoding mismatch fatal error
10295 */
10296 if ((ctxt->encoding == NULL) &&
10297 (ctxt->input->buf != NULL) &&
10298 (ctxt->input->buf->encoder == NULL)) {
10299 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10300 "Document labelled UTF-16 but has UTF-8 content\n");
10301 }
10302 if (ctxt->encoding != NULL)
10303 xmlFree((xmlChar *) ctxt->encoding);
10304 ctxt->encoding = encoding;
10305 }
10306 /*
10307 * UTF-8 encoding is handled natively
10308 */
10309 else if ((encoding != NULL) &&
10310 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10311 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10312 if (ctxt->encoding != NULL)
10313 xmlFree((xmlChar *) ctxt->encoding);
10314 ctxt->encoding = encoding;
10315 }
10316 else if (encoding != NULL) {
10317 xmlCharEncodingHandlerPtr handler;
10318
10319 if (ctxt->input->encoding != NULL)
10320 xmlFree((xmlChar *) ctxt->input->encoding);
10321 ctxt->input->encoding = encoding;
10322
10323 handler = xmlFindCharEncodingHandler((const char *) encoding);
10324 if (handler != NULL) {
10325 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10326 /* failed to convert */
10327 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10328 return(NULL);
10329 }
10330 } else {
10331 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10332 "Unsupported encoding %s\n", encoding);
10333 return(NULL);
10334 }
10335 }
10336 }
10337 return(encoding);
10338 }
10339
10340 /**
10341 * xmlParseSDDecl:
10342 * @ctxt: an XML parser context
10343 *
10344 * parse the XML standalone declaration
10345 *
10346 * [32] SDDecl ::= S 'standalone' Eq
10347 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10348 *
10349 * [ VC: Standalone Document Declaration ]
10350 * TODO The standalone document declaration must have the value "no"
10351 * if any external markup declarations contain declarations of:
10352 * - attributes with default values, if elements to which these
10353 * attributes apply appear in the document without specifications
10354 * of values for these attributes, or
10355 * - entities (other than amp, lt, gt, apos, quot), if references
10356 * to those entities appear in the document, or
10357 * - attributes with values subject to normalization, where the
10358 * attribute appears in the document with a value which will change
10359 * as a result of normalization, or
10360 * - element types with element content, if white space occurs directly
10361 * within any instance of those types.
10362 *
10363 * Returns:
10364 * 1 if standalone="yes"
10365 * 0 if standalone="no"
10366 * -2 if standalone attribute is missing or invalid
10367 * (A standalone value of -2 means that the XML declaration was found,
10368 * but no value was specified for the standalone attribute).
10369 */
10370
10371 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10372 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10373 int standalone = -2;
10374
10375 SKIP_BLANKS;
10376 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10377 SKIP(10);
10378 SKIP_BLANKS;
10379 if (RAW != '=') {
10380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10381 return(standalone);
10382 }
10383 NEXT;
10384 SKIP_BLANKS;
10385 if (RAW == '\''){
10386 NEXT;
10387 if ((RAW == 'n') && (NXT(1) == 'o')) {
10388 standalone = 0;
10389 SKIP(2);
10390 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10391 (NXT(2) == 's')) {
10392 standalone = 1;
10393 SKIP(3);
10394 } else {
10395 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10396 }
10397 if (RAW != '\'') {
10398 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10399 } else
10400 NEXT;
10401 } else if (RAW == '"'){
10402 NEXT;
10403 if ((RAW == 'n') && (NXT(1) == 'o')) {
10404 standalone = 0;
10405 SKIP(2);
10406 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10407 (NXT(2) == 's')) {
10408 standalone = 1;
10409 SKIP(3);
10410 } else {
10411 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10412 }
10413 if (RAW != '"') {
10414 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10415 } else
10416 NEXT;
10417 } else {
10418 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10419 }
10420 }
10421 return(standalone);
10422 }
10423
10424 /**
10425 * xmlParseXMLDecl:
10426 * @ctxt: an XML parser context
10427 *
10428 * parse an XML declaration header
10429 *
10430 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10431 */
10432
10433 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10434 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10435 xmlChar *version;
10436
10437 /*
10438 * This value for standalone indicates that the document has an
10439 * XML declaration but it does not have a standalone attribute.
10440 * It will be overwritten later if a standalone attribute is found.
10441 */
10442 ctxt->input->standalone = -2;
10443
10444 /*
10445 * We know that '<?xml' is here.
10446 */
10447 SKIP(5);
10448
10449 if (!IS_BLANK_CH(RAW)) {
10450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10451 "Blank needed after '<?xml'\n");
10452 }
10453 SKIP_BLANKS;
10454
10455 /*
10456 * We must have the VersionInfo here.
10457 */
10458 version = xmlParseVersionInfo(ctxt);
10459 if (version == NULL) {
10460 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10461 } else {
10462 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10463 /*
10464 * Changed here for XML-1.0 5th edition
10465 */
10466 if (ctxt->options & XML_PARSE_OLD10) {
10467 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10468 "Unsupported version '%s'\n",
10469 version);
10470 } else {
10471 if ((version[0] == '1') && ((version[1] == '.'))) {
10472 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10473 "Unsupported version '%s'\n",
10474 version, NULL);
10475 } else {
10476 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477 "Unsupported version '%s'\n",
10478 version);
10479 }
10480 }
10481 }
10482 if (ctxt->version != NULL)
10483 xmlFree((void *) ctxt->version);
10484 ctxt->version = version;
10485 }
10486
10487 /*
10488 * We may have the encoding declaration
10489 */
10490 if (!IS_BLANK_CH(RAW)) {
10491 if ((RAW == '?') && (NXT(1) == '>')) {
10492 SKIP(2);
10493 return;
10494 }
10495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10496 }
10497 xmlParseEncodingDecl(ctxt);
10498 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10499 (ctxt->instate == XML_PARSER_EOF)) {
10500 /*
10501 * The XML REC instructs us to stop parsing right here
10502 */
10503 return;
10504 }
10505
10506 /*
10507 * We may have the standalone status.
10508 */
10509 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10510 if ((RAW == '?') && (NXT(1) == '>')) {
10511 SKIP(2);
10512 return;
10513 }
10514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10515 }
10516
10517 /*
10518 * We can grow the input buffer freely at that point
10519 */
10520 GROW;
10521
10522 SKIP_BLANKS;
10523 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10524
10525 SKIP_BLANKS;
10526 if ((RAW == '?') && (NXT(1) == '>')) {
10527 SKIP(2);
10528 } else if (RAW == '>') {
10529 /* Deprecated old WD ... */
10530 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10531 NEXT;
10532 } else {
10533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10534 MOVETO_ENDTAG(CUR_PTR);
10535 NEXT;
10536 }
10537 }
10538
10539 /**
10540 * xmlParseMisc:
10541 * @ctxt: an XML parser context
10542 *
10543 * parse an XML Misc* optional field.
10544 *
10545 * [27] Misc ::= Comment | PI | S
10546 */
10547
10548 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10549 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10550 while ((ctxt->instate != XML_PARSER_EOF) &&
10551 (((RAW == '<') && (NXT(1) == '?')) ||
10552 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10553 IS_BLANK_CH(CUR))) {
10554 if ((RAW == '<') && (NXT(1) == '?')) {
10555 xmlParsePI(ctxt);
10556 } else if (IS_BLANK_CH(CUR)) {
10557 NEXT;
10558 } else
10559 xmlParseComment(ctxt);
10560 }
10561 }
10562
10563 /**
10564 * xmlParseDocument:
10565 * @ctxt: an XML parser context
10566 *
10567 * parse an XML document (and build a tree if using the standard SAX
10568 * interface).
10569 *
10570 * [1] document ::= prolog element Misc*
10571 *
10572 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10573 *
10574 * Returns 0, -1 in case of error. the parser context is augmented
10575 * as a result of the parsing.
10576 */
10577
10578 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10579 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10580 xmlChar start[4];
10581 xmlCharEncoding enc;
10582
10583 xmlInitParser();
10584
10585 if ((ctxt == NULL) || (ctxt->input == NULL))
10586 return(-1);
10587
10588 GROW;
10589
10590 /*
10591 * SAX: detecting the level.
10592 */
10593 xmlDetectSAX2(ctxt);
10594
10595 /*
10596 * SAX: beginning of the document processing.
10597 */
10598 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10599 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10600 if (ctxt->instate == XML_PARSER_EOF)
10601 return(-1);
10602
10603 if ((ctxt->encoding == NULL) &&
10604 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10605 /*
10606 * Get the 4 first bytes and decode the charset
10607 * if enc != XML_CHAR_ENCODING_NONE
10608 * plug some encoding conversion routines.
10609 */
10610 start[0] = RAW;
10611 start[1] = NXT(1);
10612 start[2] = NXT(2);
10613 start[3] = NXT(3);
10614 enc = xmlDetectCharEncoding(&start[0], 4);
10615 if (enc != XML_CHAR_ENCODING_NONE) {
10616 xmlSwitchEncoding(ctxt, enc);
10617 }
10618 }
10619
10620
10621 if (CUR == 0) {
10622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10623 return(-1);
10624 }
10625
10626 /*
10627 * Check for the XMLDecl in the Prolog.
10628 * do not GROW here to avoid the detected encoder to decode more
10629 * than just the first line, unless the amount of data is really
10630 * too small to hold "<?xml version="1.0" encoding="foo"
10631 */
10632 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10633 GROW;
10634 }
10635 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10636
10637 /*
10638 * Note that we will switch encoding on the fly.
10639 */
10640 xmlParseXMLDecl(ctxt);
10641 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10642 (ctxt->instate == XML_PARSER_EOF)) {
10643 /*
10644 * The XML REC instructs us to stop parsing right here
10645 */
10646 return(-1);
10647 }
10648 ctxt->standalone = ctxt->input->standalone;
10649 SKIP_BLANKS;
10650 } else {
10651 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10652 }
10653 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10654 ctxt->sax->startDocument(ctxt->userData);
10655 if (ctxt->instate == XML_PARSER_EOF)
10656 return(-1);
10657 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10658 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10659 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10660 }
10661
10662 /*
10663 * The Misc part of the Prolog
10664 */
10665 GROW;
10666 xmlParseMisc(ctxt);
10667
10668 /*
10669 * Then possibly doc type declaration(s) and more Misc
10670 * (doctypedecl Misc*)?
10671 */
10672 GROW;
10673 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10674
10675 ctxt->inSubset = 1;
10676 xmlParseDocTypeDecl(ctxt);
10677 if (RAW == '[') {
10678 ctxt->instate = XML_PARSER_DTD;
10679 xmlParseInternalSubset(ctxt);
10680 if (ctxt->instate == XML_PARSER_EOF)
10681 return(-1);
10682 }
10683
10684 /*
10685 * Create and update the external subset.
10686 */
10687 ctxt->inSubset = 2;
10688 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10689 (!ctxt->disableSAX))
10690 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10691 ctxt->extSubSystem, ctxt->extSubURI);
10692 if (ctxt->instate == XML_PARSER_EOF)
10693 return(-1);
10694 ctxt->inSubset = 0;
10695
10696 xmlCleanSpecialAttr(ctxt);
10697
10698 ctxt->instate = XML_PARSER_PROLOG;
10699 xmlParseMisc(ctxt);
10700 }
10701
10702 /*
10703 * Time to start parsing the tree itself
10704 */
10705 GROW;
10706 if (RAW != '<') {
10707 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10708 "Start tag expected, '<' not found\n");
10709 } else {
10710 ctxt->instate = XML_PARSER_CONTENT;
10711 xmlParseElement(ctxt);
10712 ctxt->instate = XML_PARSER_EPILOG;
10713
10714
10715 /*
10716 * The Misc part at the end
10717 */
10718 xmlParseMisc(ctxt);
10719
10720 if (RAW != 0) {
10721 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10722 }
10723 ctxt->instate = XML_PARSER_EOF;
10724 }
10725
10726 /*
10727 * SAX: end of the document processing.
10728 */
10729 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10730 ctxt->sax->endDocument(ctxt->userData);
10731
10732 /*
10733 * Remove locally kept entity definitions if the tree was not built
10734 */
10735 if ((ctxt->myDoc != NULL) &&
10736 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10737 xmlFreeDoc(ctxt->myDoc);
10738 ctxt->myDoc = NULL;
10739 }
10740
10741 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10742 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10743 if (ctxt->valid)
10744 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10745 if (ctxt->nsWellFormed)
10746 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10747 if (ctxt->options & XML_PARSE_OLD10)
10748 ctxt->myDoc->properties |= XML_DOC_OLD10;
10749 }
10750 if (! ctxt->wellFormed) {
10751 ctxt->valid = 0;
10752 return(-1);
10753 }
10754 return(0);
10755 }
10756
10757 /**
10758 * xmlParseExtParsedEnt:
10759 * @ctxt: an XML parser context
10760 *
10761 * parse a general parsed entity
10762 * An external general parsed entity is well-formed if it matches the
10763 * production labeled extParsedEnt.
10764 *
10765 * [78] extParsedEnt ::= TextDecl? content
10766 *
10767 * Returns 0, -1 in case of error. the parser context is augmented
10768 * as a result of the parsing.
10769 */
10770
10771 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10772 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10773 xmlChar start[4];
10774 xmlCharEncoding enc;
10775
10776 if ((ctxt == NULL) || (ctxt->input == NULL))
10777 return(-1);
10778
10779 xmlDefaultSAXHandlerInit();
10780
10781 xmlDetectSAX2(ctxt);
10782
10783 GROW;
10784
10785 /*
10786 * SAX: beginning of the document processing.
10787 */
10788 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10789 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10790
10791 /*
10792 * Get the 4 first bytes and decode the charset
10793 * if enc != XML_CHAR_ENCODING_NONE
10794 * plug some encoding conversion routines.
10795 */
10796 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10797 start[0] = RAW;
10798 start[1] = NXT(1);
10799 start[2] = NXT(2);
10800 start[3] = NXT(3);
10801 enc = xmlDetectCharEncoding(start, 4);
10802 if (enc != XML_CHAR_ENCODING_NONE) {
10803 xmlSwitchEncoding(ctxt, enc);
10804 }
10805 }
10806
10807
10808 if (CUR == 0) {
10809 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10810 }
10811
10812 /*
10813 * Check for the XMLDecl in the Prolog.
10814 */
10815 GROW;
10816 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10817
10818 /*
10819 * Note that we will switch encoding on the fly.
10820 */
10821 xmlParseXMLDecl(ctxt);
10822 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10823 /*
10824 * The XML REC instructs us to stop parsing right here
10825 */
10826 return(-1);
10827 }
10828 SKIP_BLANKS;
10829 } else {
10830 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10831 }
10832 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10833 ctxt->sax->startDocument(ctxt->userData);
10834 if (ctxt->instate == XML_PARSER_EOF)
10835 return(-1);
10836
10837 /*
10838 * Doing validity checking on chunk doesn't make sense
10839 */
10840 ctxt->instate = XML_PARSER_CONTENT;
10841 ctxt->validate = 0;
10842 ctxt->loadsubset = 0;
10843 ctxt->depth = 0;
10844
10845 xmlParseContent(ctxt);
10846 if (ctxt->instate == XML_PARSER_EOF)
10847 return(-1);
10848
10849 if ((RAW == '<') && (NXT(1) == '/')) {
10850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10851 } else if (RAW != 0) {
10852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10853 }
10854
10855 /*
10856 * SAX: end of the document processing.
10857 */
10858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859 ctxt->sax->endDocument(ctxt->userData);
10860
10861 if (! ctxt->wellFormed) return(-1);
10862 return(0);
10863 }
10864
10865 #ifdef LIBXML_PUSH_ENABLED
10866 /************************************************************************
10867 * *
10868 * Progressive parsing interfaces *
10869 * *
10870 ************************************************************************/
10871
10872 /**
10873 * xmlParseLookupSequence:
10874 * @ctxt: an XML parser context
10875 * @first: the first char to lookup
10876 * @next: the next char to lookup or zero
10877 * @third: the next char to lookup or zero
10878 *
10879 * Try to find if a sequence (first, next, third) or just (first next) or
10880 * (first) is available in the input stream.
10881 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10882 * to avoid rescanning sequences of bytes, it DOES change the state of the
10883 * parser, do not use liberally.
10884 *
10885 * Returns the index to the current parsing point if the full sequence
10886 * is available, -1 otherwise.
10887 */
10888 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10889 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10890 xmlChar next, xmlChar third) {
10891 int base, len;
10892 xmlParserInputPtr in;
10893 const xmlChar *buf;
10894
10895 in = ctxt->input;
10896 if (in == NULL) return(-1);
10897 base = in->cur - in->base;
10898 if (base < 0) return(-1);
10899 if (ctxt->checkIndex > base)
10900 base = ctxt->checkIndex;
10901 if (in->buf == NULL) {
10902 buf = in->base;
10903 len = in->length;
10904 } else {
10905 buf = xmlBufContent(in->buf->buffer);
10906 len = xmlBufUse(in->buf->buffer);
10907 }
10908 /* take into account the sequence length */
10909 if (third) len -= 2;
10910 else if (next) len --;
10911 for (;base < len;base++) {
10912 if (buf[base] == first) {
10913 if (third != 0) {
10914 if ((buf[base + 1] != next) ||
10915 (buf[base + 2] != third)) continue;
10916 } else if (next != 0) {
10917 if (buf[base + 1] != next) continue;
10918 }
10919 ctxt->checkIndex = 0;
10920 #ifdef DEBUG_PUSH
10921 if (next == 0)
10922 xmlGenericError(xmlGenericErrorContext,
10923 "PP: lookup '%c' found at %d\n",
10924 first, base);
10925 else if (third == 0)
10926 xmlGenericError(xmlGenericErrorContext,
10927 "PP: lookup '%c%c' found at %d\n",
10928 first, next, base);
10929 else
10930 xmlGenericError(xmlGenericErrorContext,
10931 "PP: lookup '%c%c%c' found at %d\n",
10932 first, next, third, base);
10933 #endif
10934 return(base - (in->cur - in->base));
10935 }
10936 }
10937 ctxt->checkIndex = base;
10938 #ifdef DEBUG_PUSH
10939 if (next == 0)
10940 xmlGenericError(xmlGenericErrorContext,
10941 "PP: lookup '%c' failed\n", first);
10942 else if (third == 0)
10943 xmlGenericError(xmlGenericErrorContext,
10944 "PP: lookup '%c%c' failed\n", first, next);
10945 else
10946 xmlGenericError(xmlGenericErrorContext,
10947 "PP: lookup '%c%c%c' failed\n", first, next, third);
10948 #endif
10949 return(-1);
10950 }
10951
10952 /**
10953 * xmlParseGetLasts:
10954 * @ctxt: an XML parser context
10955 * @lastlt: pointer to store the last '<' from the input
10956 * @lastgt: pointer to store the last '>' from the input
10957 *
10958 * Lookup the last < and > in the current chunk
10959 */
10960 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10961 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10962 const xmlChar **lastgt) {
10963 const xmlChar *tmp;
10964
10965 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10966 xmlGenericError(xmlGenericErrorContext,
10967 "Internal error: xmlParseGetLasts\n");
10968 return;
10969 }
10970 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10971 tmp = ctxt->input->end;
10972 tmp--;
10973 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10974 if (tmp < ctxt->input->base) {
10975 *lastlt = NULL;
10976 *lastgt = NULL;
10977 } else {
10978 *lastlt = tmp;
10979 tmp++;
10980 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10981 if (*tmp == '\'') {
10982 tmp++;
10983 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10984 if (tmp < ctxt->input->end) tmp++;
10985 } else if (*tmp == '"') {
10986 tmp++;
10987 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10988 if (tmp < ctxt->input->end) tmp++;
10989 } else
10990 tmp++;
10991 }
10992 if (tmp < ctxt->input->end)
10993 *lastgt = tmp;
10994 else {
10995 tmp = *lastlt;
10996 tmp--;
10997 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10998 if (tmp >= ctxt->input->base)
10999 *lastgt = tmp;
11000 else
11001 *lastgt = NULL;
11002 }
11003 }
11004 } else {
11005 *lastlt = NULL;
11006 *lastgt = NULL;
11007 }
11008 }
11009 /**
11010 * xmlCheckCdataPush:
11011 * @cur: pointer to the block of characters
11012 * @len: length of the block in bytes
11013 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11014 *
11015 * Check that the block of characters is okay as SCdata content [20]
11016 *
11017 * Returns the number of bytes to pass if okay, a negative index where an
11018 * UTF-8 error occurred otherwise
11019 */
11020 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11021 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11022 int ix;
11023 unsigned char c;
11024 int codepoint;
11025
11026 if ((utf == NULL) || (len <= 0))
11027 return(0);
11028
11029 for (ix = 0; ix < len;) { /* string is 0-terminated */
11030 c = utf[ix];
11031 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11032 if (c >= 0x20)
11033 ix++;
11034 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11035 ix++;
11036 else
11037 return(-ix);
11038 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11039 if (ix + 2 > len) return(complete ? -ix : ix);
11040 if ((utf[ix+1] & 0xc0 ) != 0x80)
11041 return(-ix);
11042 codepoint = (utf[ix] & 0x1f) << 6;
11043 codepoint |= utf[ix+1] & 0x3f;
11044 if (!xmlIsCharQ(codepoint))
11045 return(-ix);
11046 ix += 2;
11047 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11048 if (ix + 3 > len) return(complete ? -ix : ix);
11049 if (((utf[ix+1] & 0xc0) != 0x80) ||
11050 ((utf[ix+2] & 0xc0) != 0x80))
11051 return(-ix);
11052 codepoint = (utf[ix] & 0xf) << 12;
11053 codepoint |= (utf[ix+1] & 0x3f) << 6;
11054 codepoint |= utf[ix+2] & 0x3f;
11055 if (!xmlIsCharQ(codepoint))
11056 return(-ix);
11057 ix += 3;
11058 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11059 if (ix + 4 > len) return(complete ? -ix : ix);
11060 if (((utf[ix+1] & 0xc0) != 0x80) ||
11061 ((utf[ix+2] & 0xc0) != 0x80) ||
11062 ((utf[ix+3] & 0xc0) != 0x80))
11063 return(-ix);
11064 codepoint = (utf[ix] & 0x7) << 18;
11065 codepoint |= (utf[ix+1] & 0x3f) << 12;
11066 codepoint |= (utf[ix+2] & 0x3f) << 6;
11067 codepoint |= utf[ix+3] & 0x3f;
11068 if (!xmlIsCharQ(codepoint))
11069 return(-ix);
11070 ix += 4;
11071 } else /* unknown encoding */
11072 return(-ix);
11073 }
11074 return(ix);
11075 }
11076
11077 /**
11078 * xmlParseTryOrFinish:
11079 * @ctxt: an XML parser context
11080 * @terminate: last chunk indicator
11081 *
11082 * Try to progress on parsing
11083 *
11084 * Returns zero if no parsing was possible
11085 */
11086 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11087 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11088 int ret = 0;
11089 int avail, tlen;
11090 xmlChar cur, next;
11091 const xmlChar *lastlt, *lastgt;
11092
11093 if (ctxt->input == NULL)
11094 return(0);
11095
11096 #ifdef DEBUG_PUSH
11097 switch (ctxt->instate) {
11098 case XML_PARSER_EOF:
11099 xmlGenericError(xmlGenericErrorContext,
11100 "PP: try EOF\n"); break;
11101 case XML_PARSER_START:
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: try START\n"); break;
11104 case XML_PARSER_MISC:
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: try MISC\n");break;
11107 case XML_PARSER_COMMENT:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: try COMMENT\n");break;
11110 case XML_PARSER_PROLOG:
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: try PROLOG\n");break;
11113 case XML_PARSER_START_TAG:
11114 xmlGenericError(xmlGenericErrorContext,
11115 "PP: try START_TAG\n");break;
11116 case XML_PARSER_CONTENT:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: try CONTENT\n");break;
11119 case XML_PARSER_CDATA_SECTION:
11120 xmlGenericError(xmlGenericErrorContext,
11121 "PP: try CDATA_SECTION\n");break;
11122 case XML_PARSER_END_TAG:
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: try END_TAG\n");break;
11125 case XML_PARSER_ENTITY_DECL:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: try ENTITY_DECL\n");break;
11128 case XML_PARSER_ENTITY_VALUE:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try ENTITY_VALUE\n");break;
11131 case XML_PARSER_ATTRIBUTE_VALUE:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try ATTRIBUTE_VALUE\n");break;
11134 case XML_PARSER_DTD:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try DTD\n");break;
11137 case XML_PARSER_EPILOG:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try EPILOG\n");break;
11140 case XML_PARSER_PI:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try PI\n");break;
11143 case XML_PARSER_IGNORE:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try IGNORE\n");break;
11146 }
11147 #endif
11148
11149 if ((ctxt->input != NULL) &&
11150 (ctxt->input->cur - ctxt->input->base > 4096)) {
11151 xmlSHRINK(ctxt);
11152 ctxt->checkIndex = 0;
11153 }
11154 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11155
11156 while (ctxt->instate != XML_PARSER_EOF) {
11157 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11158 return(0);
11159
11160 if (ctxt->input == NULL) break;
11161 if (ctxt->input->buf == NULL)
11162 avail = ctxt->input->length -
11163 (ctxt->input->cur - ctxt->input->base);
11164 else {
11165 /*
11166 * If we are operating on converted input, try to flush
11167 * remainng chars to avoid them stalling in the non-converted
11168 * buffer. But do not do this in document start where
11169 * encoding="..." may not have been read and we work on a
11170 * guessed encoding.
11171 */
11172 if ((ctxt->instate != XML_PARSER_START) &&
11173 (ctxt->input->buf->raw != NULL) &&
11174 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11175 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11176 ctxt->input);
11177 size_t current = ctxt->input->cur - ctxt->input->base;
11178
11179 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11180 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11181 base, current);
11182 }
11183 avail = xmlBufUse(ctxt->input->buf->buffer) -
11184 (ctxt->input->cur - ctxt->input->base);
11185 }
11186 if (avail < 1)
11187 goto done;
11188 switch (ctxt->instate) {
11189 case XML_PARSER_EOF:
11190 /*
11191 * Document parsing is done !
11192 */
11193 goto done;
11194 case XML_PARSER_START:
11195 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11196 xmlChar start[4];
11197 xmlCharEncoding enc;
11198
11199 /*
11200 * Very first chars read from the document flow.
11201 */
11202 if (avail < 4)
11203 goto done;
11204
11205 /*
11206 * Get the 4 first bytes and decode the charset
11207 * if enc != XML_CHAR_ENCODING_NONE
11208 * plug some encoding conversion routines,
11209 * else xmlSwitchEncoding will set to (default)
11210 * UTF8.
11211 */
11212 start[0] = RAW;
11213 start[1] = NXT(1);
11214 start[2] = NXT(2);
11215 start[3] = NXT(3);
11216 enc = xmlDetectCharEncoding(start, 4);
11217 xmlSwitchEncoding(ctxt, enc);
11218 break;
11219 }
11220
11221 if (avail < 2)
11222 goto done;
11223 cur = ctxt->input->cur[0];
11224 next = ctxt->input->cur[1];
11225 if (cur == 0) {
11226 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227 ctxt->sax->setDocumentLocator(ctxt->userData,
11228 &xmlDefaultSAXLocator);
11229 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11230 xmlHaltParser(ctxt);
11231 #ifdef DEBUG_PUSH
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: entering EOF\n");
11234 #endif
11235 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236 ctxt->sax->endDocument(ctxt->userData);
11237 goto done;
11238 }
11239 if ((cur == '<') && (next == '?')) {
11240 /* PI or XML decl */
11241 if (avail < 5) return(ret);
11242 if ((!terminate) &&
11243 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11244 return(ret);
11245 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246 ctxt->sax->setDocumentLocator(ctxt->userData,
11247 &xmlDefaultSAXLocator);
11248 if ((ctxt->input->cur[2] == 'x') &&
11249 (ctxt->input->cur[3] == 'm') &&
11250 (ctxt->input->cur[4] == 'l') &&
11251 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11252 ret += 5;
11253 #ifdef DEBUG_PUSH
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: Parsing XML Decl\n");
11256 #endif
11257 xmlParseXMLDecl(ctxt);
11258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259 /*
11260 * The XML REC instructs us to stop parsing right
11261 * here
11262 */
11263 xmlHaltParser(ctxt);
11264 return(0);
11265 }
11266 ctxt->standalone = ctxt->input->standalone;
11267 if ((ctxt->encoding == NULL) &&
11268 (ctxt->input->encoding != NULL))
11269 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271 (!ctxt->disableSAX))
11272 ctxt->sax->startDocument(ctxt->userData);
11273 ctxt->instate = XML_PARSER_MISC;
11274 #ifdef DEBUG_PUSH
11275 xmlGenericError(xmlGenericErrorContext,
11276 "PP: entering MISC\n");
11277 #endif
11278 } else {
11279 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281 (!ctxt->disableSAX))
11282 ctxt->sax->startDocument(ctxt->userData);
11283 ctxt->instate = XML_PARSER_MISC;
11284 #ifdef DEBUG_PUSH
11285 xmlGenericError(xmlGenericErrorContext,
11286 "PP: entering MISC\n");
11287 #endif
11288 }
11289 } else {
11290 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291 ctxt->sax->setDocumentLocator(ctxt->userData,
11292 &xmlDefaultSAXLocator);
11293 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11294 if (ctxt->version == NULL) {
11295 xmlErrMemory(ctxt, NULL);
11296 break;
11297 }
11298 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299 (!ctxt->disableSAX))
11300 ctxt->sax->startDocument(ctxt->userData);
11301 ctxt->instate = XML_PARSER_MISC;
11302 #ifdef DEBUG_PUSH
11303 xmlGenericError(xmlGenericErrorContext,
11304 "PP: entering MISC\n");
11305 #endif
11306 }
11307 break;
11308 case XML_PARSER_START_TAG: {
11309 const xmlChar *name;
11310 const xmlChar *prefix = NULL;
11311 const xmlChar *URI = NULL;
11312 int nsNr = ctxt->nsNr;
11313
11314 if ((avail < 2) && (ctxt->inputNr == 1))
11315 goto done;
11316 cur = ctxt->input->cur[0];
11317 if (cur != '<') {
11318 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11319 xmlHaltParser(ctxt);
11320 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11321 ctxt->sax->endDocument(ctxt->userData);
11322 goto done;
11323 }
11324 if (!terminate) {
11325 if (ctxt->progressive) {
11326 /* > can be found unescaped in attribute values */
11327 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11328 goto done;
11329 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11330 goto done;
11331 }
11332 }
11333 if (ctxt->spaceNr == 0)
11334 spacePush(ctxt, -1);
11335 else if (*ctxt->space == -2)
11336 spacePush(ctxt, -1);
11337 else
11338 spacePush(ctxt, *ctxt->space);
11339 #ifdef LIBXML_SAX1_ENABLED
11340 if (ctxt->sax2)
11341 #endif /* LIBXML_SAX1_ENABLED */
11342 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11343 #ifdef LIBXML_SAX1_ENABLED
11344 else
11345 name = xmlParseStartTag(ctxt);
11346 #endif /* LIBXML_SAX1_ENABLED */
11347 if (ctxt->instate == XML_PARSER_EOF)
11348 goto done;
11349 if (name == NULL) {
11350 spacePop(ctxt);
11351 xmlHaltParser(ctxt);
11352 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11353 ctxt->sax->endDocument(ctxt->userData);
11354 goto done;
11355 }
11356 #ifdef LIBXML_VALID_ENABLED
11357 /*
11358 * [ VC: Root Element Type ]
11359 * The Name in the document type declaration must match
11360 * the element type of the root element.
11361 */
11362 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11363 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11364 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11365 #endif /* LIBXML_VALID_ENABLED */
11366
11367 /*
11368 * Check for an Empty Element.
11369 */
11370 if ((RAW == '/') && (NXT(1) == '>')) {
11371 SKIP(2);
11372
11373 if (ctxt->sax2) {
11374 if ((ctxt->sax != NULL) &&
11375 (ctxt->sax->endElementNs != NULL) &&
11376 (!ctxt->disableSAX))
11377 ctxt->sax->endElementNs(ctxt->userData, name,
11378 prefix, URI);
11379 if (ctxt->nsNr - nsNr > 0)
11380 nsPop(ctxt, ctxt->nsNr - nsNr);
11381 #ifdef LIBXML_SAX1_ENABLED
11382 } else {
11383 if ((ctxt->sax != NULL) &&
11384 (ctxt->sax->endElement != NULL) &&
11385 (!ctxt->disableSAX))
11386 ctxt->sax->endElement(ctxt->userData, name);
11387 #endif /* LIBXML_SAX1_ENABLED */
11388 }
11389 if (ctxt->instate == XML_PARSER_EOF)
11390 goto done;
11391 spacePop(ctxt);
11392 if (ctxt->nameNr == 0) {
11393 ctxt->instate = XML_PARSER_EPILOG;
11394 } else {
11395 ctxt->instate = XML_PARSER_CONTENT;
11396 }
11397 ctxt->progressive = 1;
11398 break;
11399 }
11400 if (RAW == '>') {
11401 NEXT;
11402 } else {
11403 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11404 "Couldn't find end of Start Tag %s\n",
11405 name);
11406 nodePop(ctxt);
11407 spacePop(ctxt);
11408 }
11409 if (ctxt->sax2)
11410 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11411 #ifdef LIBXML_SAX1_ENABLED
11412 else
11413 namePush(ctxt, name);
11414 #endif /* LIBXML_SAX1_ENABLED */
11415
11416 ctxt->instate = XML_PARSER_CONTENT;
11417 ctxt->progressive = 1;
11418 break;
11419 }
11420 case XML_PARSER_CONTENT: {
11421 const xmlChar *test;
11422 unsigned int cons;
11423 if ((avail < 2) && (ctxt->inputNr == 1))
11424 goto done;
11425 cur = ctxt->input->cur[0];
11426 next = ctxt->input->cur[1];
11427
11428 test = CUR_PTR;
11429 cons = ctxt->input->consumed;
11430 if ((cur == '<') && (next == '/')) {
11431 ctxt->instate = XML_PARSER_END_TAG;
11432 break;
11433 } else if ((cur == '<') && (next == '?')) {
11434 if ((!terminate) &&
11435 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11436 ctxt->progressive = XML_PARSER_PI;
11437 goto done;
11438 }
11439 xmlParsePI(ctxt);
11440 ctxt->instate = XML_PARSER_CONTENT;
11441 ctxt->progressive = 1;
11442 } else if ((cur == '<') && (next != '!')) {
11443 ctxt->instate = XML_PARSER_START_TAG;
11444 break;
11445 } else if ((cur == '<') && (next == '!') &&
11446 (ctxt->input->cur[2] == '-') &&
11447 (ctxt->input->cur[3] == '-')) {
11448 int term;
11449
11450 if (avail < 4)
11451 goto done;
11452 ctxt->input->cur += 4;
11453 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11454 ctxt->input->cur -= 4;
11455 if ((!terminate) && (term < 0)) {
11456 ctxt->progressive = XML_PARSER_COMMENT;
11457 goto done;
11458 }
11459 xmlParseComment(ctxt);
11460 ctxt->instate = XML_PARSER_CONTENT;
11461 ctxt->progressive = 1;
11462 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11463 (ctxt->input->cur[2] == '[') &&
11464 (ctxt->input->cur[3] == 'C') &&
11465 (ctxt->input->cur[4] == 'D') &&
11466 (ctxt->input->cur[5] == 'A') &&
11467 (ctxt->input->cur[6] == 'T') &&
11468 (ctxt->input->cur[7] == 'A') &&
11469 (ctxt->input->cur[8] == '[')) {
11470 SKIP(9);
11471 ctxt->instate = XML_PARSER_CDATA_SECTION;
11472 break;
11473 } else if ((cur == '<') && (next == '!') &&
11474 (avail < 9)) {
11475 goto done;
11476 } else if (cur == '&') {
11477 if ((!terminate) &&
11478 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11479 goto done;
11480 xmlParseReference(ctxt);
11481 } else {
11482 /* TODO Avoid the extra copy, handle directly !!! */
11483 /*
11484 * Goal of the following test is:
11485 * - minimize calls to the SAX 'character' callback
11486 * when they are mergeable
11487 * - handle an problem for isBlank when we only parse
11488 * a sequence of blank chars and the next one is
11489 * not available to check against '<' presence.
11490 * - tries to homogenize the differences in SAX
11491 * callbacks between the push and pull versions
11492 * of the parser.
11493 */
11494 if ((ctxt->inputNr == 1) &&
11495 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11496 if (!terminate) {
11497 if (ctxt->progressive) {
11498 if ((lastlt == NULL) ||
11499 (ctxt->input->cur > lastlt))
11500 goto done;
11501 } else if (xmlParseLookupSequence(ctxt,
11502 '<', 0, 0) < 0) {
11503 goto done;
11504 }
11505 }
11506 }
11507 ctxt->checkIndex = 0;
11508 xmlParseCharData(ctxt, 0);
11509 }
11510 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11511 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11512 "detected an error in element content\n");
11513 xmlHaltParser(ctxt);
11514 break;
11515 }
11516 break;
11517 }
11518 case XML_PARSER_END_TAG:
11519 if (avail < 2)
11520 goto done;
11521 if (!terminate) {
11522 if (ctxt->progressive) {
11523 /* > can be found unescaped in attribute values */
11524 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11525 goto done;
11526 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11527 goto done;
11528 }
11529 }
11530 if (ctxt->sax2) {
11531 xmlParseEndTag2(ctxt,
11532 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11533 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11534 (int) (ptrdiff_t)
11535 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11536 nameNsPop(ctxt);
11537 }
11538 #ifdef LIBXML_SAX1_ENABLED
11539 else
11540 xmlParseEndTag1(ctxt, 0);
11541 #endif /* LIBXML_SAX1_ENABLED */
11542 if (ctxt->instate == XML_PARSER_EOF) {
11543 /* Nothing */
11544 } else if (ctxt->nameNr == 0) {
11545 ctxt->instate = XML_PARSER_EPILOG;
11546 } else {
11547 ctxt->instate = XML_PARSER_CONTENT;
11548 }
11549 break;
11550 case XML_PARSER_CDATA_SECTION: {
11551 /*
11552 * The Push mode need to have the SAX callback for
11553 * cdataBlock merge back contiguous callbacks.
11554 */
11555 int base;
11556
11557 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11558 if (base < 0) {
11559 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11560 int tmp;
11561
11562 tmp = xmlCheckCdataPush(ctxt->input->cur,
11563 XML_PARSER_BIG_BUFFER_SIZE, 0);
11564 if (tmp < 0) {
11565 tmp = -tmp;
11566 ctxt->input->cur += tmp;
11567 goto encoding_error;
11568 }
11569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11570 if (ctxt->sax->cdataBlock != NULL)
11571 ctxt->sax->cdataBlock(ctxt->userData,
11572 ctxt->input->cur, tmp);
11573 else if (ctxt->sax->characters != NULL)
11574 ctxt->sax->characters(ctxt->userData,
11575 ctxt->input->cur, tmp);
11576 }
11577 if (ctxt->instate == XML_PARSER_EOF)
11578 goto done;
11579 SKIPL(tmp);
11580 ctxt->checkIndex = 0;
11581 }
11582 goto done;
11583 } else {
11584 int tmp;
11585
11586 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11587 if ((tmp < 0) || (tmp != base)) {
11588 tmp = -tmp;
11589 ctxt->input->cur += tmp;
11590 goto encoding_error;
11591 }
11592 if ((ctxt->sax != NULL) && (base == 0) &&
11593 (ctxt->sax->cdataBlock != NULL) &&
11594 (!ctxt->disableSAX)) {
11595 /*
11596 * Special case to provide identical behaviour
11597 * between pull and push parsers on enpty CDATA
11598 * sections
11599 */
11600 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11601 (!strncmp((const char *)&ctxt->input->cur[-9],
11602 "<![CDATA[", 9)))
11603 ctxt->sax->cdataBlock(ctxt->userData,
11604 BAD_CAST "", 0);
11605 } else if ((ctxt->sax != NULL) && (base > 0) &&
11606 (!ctxt->disableSAX)) {
11607 if (ctxt->sax->cdataBlock != NULL)
11608 ctxt->sax->cdataBlock(ctxt->userData,
11609 ctxt->input->cur, base);
11610 else if (ctxt->sax->characters != NULL)
11611 ctxt->sax->characters(ctxt->userData,
11612 ctxt->input->cur, base);
11613 }
11614 if (ctxt->instate == XML_PARSER_EOF)
11615 goto done;
11616 SKIPL(base + 3);
11617 ctxt->checkIndex = 0;
11618 ctxt->instate = XML_PARSER_CONTENT;
11619 #ifdef DEBUG_PUSH
11620 xmlGenericError(xmlGenericErrorContext,
11621 "PP: entering CONTENT\n");
11622 #endif
11623 }
11624 break;
11625 }
11626 case XML_PARSER_MISC:
11627 SKIP_BLANKS;
11628 if (ctxt->input->buf == NULL)
11629 avail = ctxt->input->length -
11630 (ctxt->input->cur - ctxt->input->base);
11631 else
11632 avail = xmlBufUse(ctxt->input->buf->buffer) -
11633 (ctxt->input->cur - ctxt->input->base);
11634 if (avail < 2)
11635 goto done;
11636 cur = ctxt->input->cur[0];
11637 next = ctxt->input->cur[1];
11638 if ((cur == '<') && (next == '?')) {
11639 if ((!terminate) &&
11640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11641 ctxt->progressive = XML_PARSER_PI;
11642 goto done;
11643 }
11644 #ifdef DEBUG_PUSH
11645 xmlGenericError(xmlGenericErrorContext,
11646 "PP: Parsing PI\n");
11647 #endif
11648 xmlParsePI(ctxt);
11649 if (ctxt->instate == XML_PARSER_EOF)
11650 goto done;
11651 ctxt->instate = XML_PARSER_MISC;
11652 ctxt->progressive = 1;
11653 ctxt->checkIndex = 0;
11654 } else if ((cur == '<') && (next == '!') &&
11655 (ctxt->input->cur[2] == '-') &&
11656 (ctxt->input->cur[3] == '-')) {
11657 if ((!terminate) &&
11658 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11659 ctxt->progressive = XML_PARSER_COMMENT;
11660 goto done;
11661 }
11662 #ifdef DEBUG_PUSH
11663 xmlGenericError(xmlGenericErrorContext,
11664 "PP: Parsing Comment\n");
11665 #endif
11666 xmlParseComment(ctxt);
11667 if (ctxt->instate == XML_PARSER_EOF)
11668 goto done;
11669 ctxt->instate = XML_PARSER_MISC;
11670 ctxt->progressive = 1;
11671 ctxt->checkIndex = 0;
11672 } else if ((cur == '<') && (next == '!') &&
11673 (ctxt->input->cur[2] == 'D') &&
11674 (ctxt->input->cur[3] == 'O') &&
11675 (ctxt->input->cur[4] == 'C') &&
11676 (ctxt->input->cur[5] == 'T') &&
11677 (ctxt->input->cur[6] == 'Y') &&
11678 (ctxt->input->cur[7] == 'P') &&
11679 (ctxt->input->cur[8] == 'E')) {
11680 if ((!terminate) &&
11681 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11682 ctxt->progressive = XML_PARSER_DTD;
11683 goto done;
11684 }
11685 #ifdef DEBUG_PUSH
11686 xmlGenericError(xmlGenericErrorContext,
11687 "PP: Parsing internal subset\n");
11688 #endif
11689 ctxt->inSubset = 1;
11690 ctxt->progressive = 0;
11691 ctxt->checkIndex = 0;
11692 xmlParseDocTypeDecl(ctxt);
11693 if (ctxt->instate == XML_PARSER_EOF)
11694 goto done;
11695 if (RAW == '[') {
11696 ctxt->instate = XML_PARSER_DTD;
11697 #ifdef DEBUG_PUSH
11698 xmlGenericError(xmlGenericErrorContext,
11699 "PP: entering DTD\n");
11700 #endif
11701 } else {
11702 /*
11703 * Create and update the external subset.
11704 */
11705 ctxt->inSubset = 2;
11706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11707 (ctxt->sax->externalSubset != NULL))
11708 ctxt->sax->externalSubset(ctxt->userData,
11709 ctxt->intSubName, ctxt->extSubSystem,
11710 ctxt->extSubURI);
11711 ctxt->inSubset = 0;
11712 xmlCleanSpecialAttr(ctxt);
11713 ctxt->instate = XML_PARSER_PROLOG;
11714 #ifdef DEBUG_PUSH
11715 xmlGenericError(xmlGenericErrorContext,
11716 "PP: entering PROLOG\n");
11717 #endif
11718 }
11719 } else if ((cur == '<') && (next == '!') &&
11720 (avail < 9)) {
11721 goto done;
11722 } else {
11723 ctxt->instate = XML_PARSER_START_TAG;
11724 ctxt->progressive = XML_PARSER_START_TAG;
11725 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11726 #ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: entering START_TAG\n");
11729 #endif
11730 }
11731 break;
11732 case XML_PARSER_PROLOG:
11733 SKIP_BLANKS;
11734 if (ctxt->input->buf == NULL)
11735 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11736 else
11737 avail = xmlBufUse(ctxt->input->buf->buffer) -
11738 (ctxt->input->cur - ctxt->input->base);
11739 if (avail < 2)
11740 goto done;
11741 cur = ctxt->input->cur[0];
11742 next = ctxt->input->cur[1];
11743 if ((cur == '<') && (next == '?')) {
11744 if ((!terminate) &&
11745 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11746 ctxt->progressive = XML_PARSER_PI;
11747 goto done;
11748 }
11749 #ifdef DEBUG_PUSH
11750 xmlGenericError(xmlGenericErrorContext,
11751 "PP: Parsing PI\n");
11752 #endif
11753 xmlParsePI(ctxt);
11754 if (ctxt->instate == XML_PARSER_EOF)
11755 goto done;
11756 ctxt->instate = XML_PARSER_PROLOG;
11757 ctxt->progressive = 1;
11758 } else if ((cur == '<') && (next == '!') &&
11759 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11760 if ((!terminate) &&
11761 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11762 ctxt->progressive = XML_PARSER_COMMENT;
11763 goto done;
11764 }
11765 #ifdef DEBUG_PUSH
11766 xmlGenericError(xmlGenericErrorContext,
11767 "PP: Parsing Comment\n");
11768 #endif
11769 xmlParseComment(ctxt);
11770 if (ctxt->instate == XML_PARSER_EOF)
11771 goto done;
11772 ctxt->instate = XML_PARSER_PROLOG;
11773 ctxt->progressive = 1;
11774 } else if ((cur == '<') && (next == '!') &&
11775 (avail < 4)) {
11776 goto done;
11777 } else {
11778 ctxt->instate = XML_PARSER_START_TAG;
11779 if (ctxt->progressive == 0)
11780 ctxt->progressive = XML_PARSER_START_TAG;
11781 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11782 #ifdef DEBUG_PUSH
11783 xmlGenericError(xmlGenericErrorContext,
11784 "PP: entering START_TAG\n");
11785 #endif
11786 }
11787 break;
11788 case XML_PARSER_EPILOG:
11789 SKIP_BLANKS;
11790 if (ctxt->input->buf == NULL)
11791 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11792 else
11793 avail = xmlBufUse(ctxt->input->buf->buffer) -
11794 (ctxt->input->cur - ctxt->input->base);
11795 if (avail < 2)
11796 goto done;
11797 cur = ctxt->input->cur[0];
11798 next = ctxt->input->cur[1];
11799 if ((cur == '<') && (next == '?')) {
11800 if ((!terminate) &&
11801 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11802 ctxt->progressive = XML_PARSER_PI;
11803 goto done;
11804 }
11805 #ifdef DEBUG_PUSH
11806 xmlGenericError(xmlGenericErrorContext,
11807 "PP: Parsing PI\n");
11808 #endif
11809 xmlParsePI(ctxt);
11810 if (ctxt->instate == XML_PARSER_EOF)
11811 goto done;
11812 ctxt->instate = XML_PARSER_EPILOG;
11813 ctxt->progressive = 1;
11814 } else if ((cur == '<') && (next == '!') &&
11815 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11816 if ((!terminate) &&
11817 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11818 ctxt->progressive = XML_PARSER_COMMENT;
11819 goto done;
11820 }
11821 #ifdef DEBUG_PUSH
11822 xmlGenericError(xmlGenericErrorContext,
11823 "PP: Parsing Comment\n");
11824 #endif
11825 xmlParseComment(ctxt);
11826 if (ctxt->instate == XML_PARSER_EOF)
11827 goto done;
11828 ctxt->instate = XML_PARSER_EPILOG;
11829 ctxt->progressive = 1;
11830 } else if ((cur == '<') && (next == '!') &&
11831 (avail < 4)) {
11832 goto done;
11833 } else {
11834 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11835 xmlHaltParser(ctxt);
11836 #ifdef DEBUG_PUSH
11837 xmlGenericError(xmlGenericErrorContext,
11838 "PP: entering EOF\n");
11839 #endif
11840 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11841 ctxt->sax->endDocument(ctxt->userData);
11842 goto done;
11843 }
11844 break;
11845 case XML_PARSER_DTD: {
11846 /*
11847 * Sorry but progressive parsing of the internal subset
11848 * is not expected to be supported. We first check that
11849 * the full content of the internal subset is available and
11850 * the parsing is launched only at that point.
11851 * Internal subset ends up with "']' S? '>'" in an unescaped
11852 * section and not in a ']]>' sequence which are conditional
11853 * sections (whoever argued to keep that crap in XML deserve
11854 * a place in hell !).
11855 */
11856 int base, i;
11857 xmlChar *buf;
11858 xmlChar quote = 0;
11859 size_t use;
11860
11861 base = ctxt->input->cur - ctxt->input->base;
11862 if (base < 0) return(0);
11863 if (ctxt->checkIndex > base)
11864 base = ctxt->checkIndex;
11865 buf = xmlBufContent(ctxt->input->buf->buffer);
11866 use = xmlBufUse(ctxt->input->buf->buffer);
11867 for (;(unsigned int) base < use; base++) {
11868 if (quote != 0) {
11869 if (buf[base] == quote)
11870 quote = 0;
11871 continue;
11872 }
11873 if ((quote == 0) && (buf[base] == '<')) {
11874 int found = 0;
11875 /* special handling of comments */
11876 if (((unsigned int) base + 4 < use) &&
11877 (buf[base + 1] == '!') &&
11878 (buf[base + 2] == '-') &&
11879 (buf[base + 3] == '-')) {
11880 for (;(unsigned int) base + 3 < use; base++) {
11881 if ((buf[base] == '-') &&
11882 (buf[base + 1] == '-') &&
11883 (buf[base + 2] == '>')) {
11884 found = 1;
11885 base += 2;
11886 break;
11887 }
11888 }
11889 if (!found) {
11890 #if 0
11891 fprintf(stderr, "unfinished comment\n");
11892 #endif
11893 break; /* for */
11894 }
11895 continue;
11896 }
11897 }
11898 if (buf[base] == '"') {
11899 quote = '"';
11900 continue;
11901 }
11902 if (buf[base] == '\'') {
11903 quote = '\'';
11904 continue;
11905 }
11906 if (buf[base] == ']') {
11907 #if 0
11908 fprintf(stderr, "%c%c%c%c: ", buf[base],
11909 buf[base + 1], buf[base + 2], buf[base + 3]);
11910 #endif
11911 if ((unsigned int) base +1 >= use)
11912 break;
11913 if (buf[base + 1] == ']') {
11914 /* conditional crap, skip both ']' ! */
11915 base++;
11916 continue;
11917 }
11918 for (i = 1; (unsigned int) base + i < use; i++) {
11919 if (buf[base + i] == '>') {
11920 #if 0
11921 fprintf(stderr, "found\n");
11922 #endif
11923 goto found_end_int_subset;
11924 }
11925 if (!IS_BLANK_CH(buf[base + i])) {
11926 #if 0
11927 fprintf(stderr, "not found\n");
11928 #endif
11929 goto not_end_of_int_subset;
11930 }
11931 }
11932 #if 0
11933 fprintf(stderr, "end of stream\n");
11934 #endif
11935 break;
11936
11937 }
11938 not_end_of_int_subset:
11939 continue; /* for */
11940 }
11941 /*
11942 * We didn't found the end of the Internal subset
11943 */
11944 if (quote == 0)
11945 ctxt->checkIndex = base;
11946 else
11947 ctxt->checkIndex = 0;
11948 #ifdef DEBUG_PUSH
11949 if (next == 0)
11950 xmlGenericError(xmlGenericErrorContext,
11951 "PP: lookup of int subset end filed\n");
11952 #endif
11953 goto done;
11954
11955 found_end_int_subset:
11956 ctxt->checkIndex = 0;
11957 xmlParseInternalSubset(ctxt);
11958 if (ctxt->instate == XML_PARSER_EOF)
11959 goto done;
11960 ctxt->inSubset = 2;
11961 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11962 (ctxt->sax->externalSubset != NULL))
11963 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11964 ctxt->extSubSystem, ctxt->extSubURI);
11965 ctxt->inSubset = 0;
11966 xmlCleanSpecialAttr(ctxt);
11967 if (ctxt->instate == XML_PARSER_EOF)
11968 goto done;
11969 ctxt->instate = XML_PARSER_PROLOG;
11970 ctxt->checkIndex = 0;
11971 #ifdef DEBUG_PUSH
11972 xmlGenericError(xmlGenericErrorContext,
11973 "PP: entering PROLOG\n");
11974 #endif
11975 break;
11976 }
11977 case XML_PARSER_COMMENT:
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: internal error, state == COMMENT\n");
11980 ctxt->instate = XML_PARSER_CONTENT;
11981 #ifdef DEBUG_PUSH
11982 xmlGenericError(xmlGenericErrorContext,
11983 "PP: entering CONTENT\n");
11984 #endif
11985 break;
11986 case XML_PARSER_IGNORE:
11987 xmlGenericError(xmlGenericErrorContext,
11988 "PP: internal error, state == IGNORE");
11989 ctxt->instate = XML_PARSER_DTD;
11990 #ifdef DEBUG_PUSH
11991 xmlGenericError(xmlGenericErrorContext,
11992 "PP: entering DTD\n");
11993 #endif
11994 break;
11995 case XML_PARSER_PI:
11996 xmlGenericError(xmlGenericErrorContext,
11997 "PP: internal error, state == PI\n");
11998 ctxt->instate = XML_PARSER_CONTENT;
11999 #ifdef DEBUG_PUSH
12000 xmlGenericError(xmlGenericErrorContext,
12001 "PP: entering CONTENT\n");
12002 #endif
12003 break;
12004 case XML_PARSER_ENTITY_DECL:
12005 xmlGenericError(xmlGenericErrorContext,
12006 "PP: internal error, state == ENTITY_DECL\n");
12007 ctxt->instate = XML_PARSER_DTD;
12008 #ifdef DEBUG_PUSH
12009 xmlGenericError(xmlGenericErrorContext,
12010 "PP: entering DTD\n");
12011 #endif
12012 break;
12013 case XML_PARSER_ENTITY_VALUE:
12014 xmlGenericError(xmlGenericErrorContext,
12015 "PP: internal error, state == ENTITY_VALUE\n");
12016 ctxt->instate = XML_PARSER_CONTENT;
12017 #ifdef DEBUG_PUSH
12018 xmlGenericError(xmlGenericErrorContext,
12019 "PP: entering DTD\n");
12020 #endif
12021 break;
12022 case XML_PARSER_ATTRIBUTE_VALUE:
12023 xmlGenericError(xmlGenericErrorContext,
12024 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12025 ctxt->instate = XML_PARSER_START_TAG;
12026 #ifdef DEBUG_PUSH
12027 xmlGenericError(xmlGenericErrorContext,
12028 "PP: entering START_TAG\n");
12029 #endif
12030 break;
12031 case XML_PARSER_SYSTEM_LITERAL:
12032 xmlGenericError(xmlGenericErrorContext,
12033 "PP: internal error, state == SYSTEM_LITERAL\n");
12034 ctxt->instate = XML_PARSER_START_TAG;
12035 #ifdef DEBUG_PUSH
12036 xmlGenericError(xmlGenericErrorContext,
12037 "PP: entering START_TAG\n");
12038 #endif
12039 break;
12040 case XML_PARSER_PUBLIC_LITERAL:
12041 xmlGenericError(xmlGenericErrorContext,
12042 "PP: internal error, state == PUBLIC_LITERAL\n");
12043 ctxt->instate = XML_PARSER_START_TAG;
12044 #ifdef DEBUG_PUSH
12045 xmlGenericError(xmlGenericErrorContext,
12046 "PP: entering START_TAG\n");
12047 #endif
12048 break;
12049 }
12050 }
12051 done:
12052 #ifdef DEBUG_PUSH
12053 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12054 #endif
12055 return(ret);
12056 encoding_error:
12057 {
12058 char buffer[150];
12059
12060 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12061 ctxt->input->cur[0], ctxt->input->cur[1],
12062 ctxt->input->cur[2], ctxt->input->cur[3]);
12063 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12064 "Input is not proper UTF-8, indicate encoding !\n%s",
12065 BAD_CAST buffer, NULL);
12066 }
12067 return(0);
12068 }
12069
12070 /**
12071 * xmlParseCheckTransition:
12072 * @ctxt: an XML parser context
12073 * @chunk: a char array
12074 * @size: the size in byte of the chunk
12075 *
12076 * Check depending on the current parser state if the chunk given must be
12077 * processed immediately or one need more data to advance on parsing.
12078 *
12079 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12080 */
12081 static int
xmlParseCheckTransition(xmlParserCtxtPtr ctxt,const char * chunk,int size)12082 xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12083 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12084 return(-1);
12085 if (ctxt->instate == XML_PARSER_START_TAG) {
12086 if (memchr(chunk, '>', size) != NULL)
12087 return(1);
12088 return(0);
12089 }
12090 if (ctxt->progressive == XML_PARSER_COMMENT) {
12091 if (memchr(chunk, '>', size) != NULL)
12092 return(1);
12093 return(0);
12094 }
12095 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12096 if (memchr(chunk, '>', size) != NULL)
12097 return(1);
12098 return(0);
12099 }
12100 if (ctxt->progressive == XML_PARSER_PI) {
12101 if (memchr(chunk, '>', size) != NULL)
12102 return(1);
12103 return(0);
12104 }
12105 if (ctxt->instate == XML_PARSER_END_TAG) {
12106 if (memchr(chunk, '>', size) != NULL)
12107 return(1);
12108 return(0);
12109 }
12110 if ((ctxt->progressive == XML_PARSER_DTD) ||
12111 (ctxt->instate == XML_PARSER_DTD)) {
12112 if (memchr(chunk, '>', size) != NULL)
12113 return(1);
12114 return(0);
12115 }
12116 return(1);
12117 }
12118
12119 /**
12120 * xmlParseChunk:
12121 * @ctxt: an XML parser context
12122 * @chunk: an char array
12123 * @size: the size in byte of the chunk
12124 * @terminate: last chunk indicator
12125 *
12126 * Parse a Chunk of memory
12127 *
12128 * Returns zero if no error, the xmlParserErrors otherwise.
12129 */
12130 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12131 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12132 int terminate) {
12133 int end_in_lf = 0;
12134 int remain = 0;
12135 size_t old_avail = 0;
12136 size_t avail = 0;
12137
12138 if (ctxt == NULL)
12139 return(XML_ERR_INTERNAL_ERROR);
12140 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12141 return(ctxt->errNo);
12142 if (ctxt->instate == XML_PARSER_EOF)
12143 return(-1);
12144 if (ctxt->instate == XML_PARSER_START)
12145 xmlDetectSAX2(ctxt);
12146 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12147 (chunk[size - 1] == '\r')) {
12148 end_in_lf = 1;
12149 size--;
12150 }
12151
12152 xmldecl_done:
12153
12154 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12155 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12156 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12157 size_t cur = ctxt->input->cur - ctxt->input->base;
12158 int res;
12159
12160 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12161 /*
12162 * Specific handling if we autodetected an encoding, we should not
12163 * push more than the first line ... which depend on the encoding
12164 * And only push the rest once the final encoding was detected
12165 */
12166 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12167 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12168 unsigned int len = 45;
12169
12170 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12171 BAD_CAST "UTF-16")) ||
12172 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12173 BAD_CAST "UTF16")))
12174 len = 90;
12175 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176 BAD_CAST "UCS-4")) ||
12177 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12178 BAD_CAST "UCS4")))
12179 len = 180;
12180
12181 if (ctxt->input->buf->rawconsumed < len)
12182 len -= ctxt->input->buf->rawconsumed;
12183
12184 /*
12185 * Change size for reading the initial declaration only
12186 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12187 * will blindly copy extra bytes from memory.
12188 */
12189 if ((unsigned int) size > len) {
12190 remain = size - len;
12191 size = len;
12192 } else {
12193 remain = 0;
12194 }
12195 }
12196 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12197 if (res < 0) {
12198 ctxt->errNo = XML_PARSER_EOF;
12199 xmlHaltParser(ctxt);
12200 return (XML_PARSER_EOF);
12201 }
12202 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12203 #ifdef DEBUG_PUSH
12204 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12205 #endif
12206
12207 } else if (ctxt->instate != XML_PARSER_EOF) {
12208 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12209 xmlParserInputBufferPtr in = ctxt->input->buf;
12210 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12211 (in->raw != NULL)) {
12212 int nbchars;
12213 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12214 size_t current = ctxt->input->cur - ctxt->input->base;
12215
12216 nbchars = xmlCharEncInput(in, terminate);
12217 if (nbchars < 0) {
12218 /* TODO 2.6.0 */
12219 xmlGenericError(xmlGenericErrorContext,
12220 "xmlParseChunk: encoder error\n");
12221 xmlHaltParser(ctxt);
12222 return(XML_ERR_INVALID_ENCODING);
12223 }
12224 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12225 }
12226 }
12227 }
12228 if (remain != 0) {
12229 xmlParseTryOrFinish(ctxt, 0);
12230 } else {
12231 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12232 avail = xmlBufUse(ctxt->input->buf->buffer);
12233 /*
12234 * Depending on the current state it may not be such
12235 * a good idea to try parsing if there is nothing in the chunk
12236 * which would be worth doing a parser state transition and we
12237 * need to wait for more data
12238 */
12239 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12240 (old_avail == 0) || (avail == 0) ||
12241 (xmlParseCheckTransition(ctxt,
12242 (const char *)&ctxt->input->base[old_avail],
12243 avail - old_avail)))
12244 xmlParseTryOrFinish(ctxt, terminate);
12245 }
12246 if (ctxt->instate == XML_PARSER_EOF)
12247 return(ctxt->errNo);
12248
12249 if ((ctxt->input != NULL) &&
12250 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12251 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12252 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12253 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12254 xmlHaltParser(ctxt);
12255 }
12256 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12257 return(ctxt->errNo);
12258
12259 if (remain != 0) {
12260 chunk += size;
12261 size = remain;
12262 remain = 0;
12263 goto xmldecl_done;
12264 }
12265 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12266 (ctxt->input->buf != NULL)) {
12267 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12268 ctxt->input);
12269 size_t current = ctxt->input->cur - ctxt->input->base;
12270
12271 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12272
12273 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12274 base, current);
12275 }
12276 if (terminate) {
12277 /*
12278 * Check for termination
12279 */
12280 int cur_avail = 0;
12281
12282 if (ctxt->input != NULL) {
12283 if (ctxt->input->buf == NULL)
12284 cur_avail = ctxt->input->length -
12285 (ctxt->input->cur - ctxt->input->base);
12286 else
12287 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12288 (ctxt->input->cur - ctxt->input->base);
12289 }
12290
12291 if ((ctxt->instate != XML_PARSER_EOF) &&
12292 (ctxt->instate != XML_PARSER_EPILOG)) {
12293 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12294 }
12295 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12296 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12297 }
12298 if (ctxt->instate != XML_PARSER_EOF) {
12299 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12300 ctxt->sax->endDocument(ctxt->userData);
12301 }
12302 ctxt->instate = XML_PARSER_EOF;
12303 }
12304 if (ctxt->wellFormed == 0)
12305 return((xmlParserErrors) ctxt->errNo);
12306 else
12307 return(0);
12308 }
12309
12310 /************************************************************************
12311 * *
12312 * I/O front end functions to the parser *
12313 * *
12314 ************************************************************************/
12315
12316 /**
12317 * xmlCreatePushParserCtxt:
12318 * @sax: a SAX handler
12319 * @user_data: The user data returned on SAX callbacks
12320 * @chunk: a pointer to an array of chars
12321 * @size: number of chars in the array
12322 * @filename: an optional file name or URI
12323 *
12324 * Create a parser context for using the XML parser in push mode.
12325 * If @buffer and @size are non-NULL, the data is used to detect
12326 * the encoding. The remaining characters will be parsed so they
12327 * don't need to be fed in again through xmlParseChunk.
12328 * To allow content encoding detection, @size should be >= 4
12329 * The value of @filename is used for fetching external entities
12330 * and error/warning reports.
12331 *
12332 * Returns the new parser context or NULL
12333 */
12334
12335 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12336 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12337 const char *chunk, int size, const char *filename) {
12338 xmlParserCtxtPtr ctxt;
12339 xmlParserInputPtr inputStream;
12340 xmlParserInputBufferPtr buf;
12341 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12342
12343 /*
12344 * plug some encoding conversion routines
12345 */
12346 if ((chunk != NULL) && (size >= 4))
12347 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12348
12349 buf = xmlAllocParserInputBuffer(enc);
12350 if (buf == NULL) return(NULL);
12351
12352 ctxt = xmlNewParserCtxt();
12353 if (ctxt == NULL) {
12354 xmlErrMemory(NULL, "creating parser: out of memory\n");
12355 xmlFreeParserInputBuffer(buf);
12356 return(NULL);
12357 }
12358 ctxt->dictNames = 1;
12359 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12360 if (ctxt->pushTab == NULL) {
12361 xmlErrMemory(ctxt, NULL);
12362 xmlFreeParserInputBuffer(buf);
12363 xmlFreeParserCtxt(ctxt);
12364 return(NULL);
12365 }
12366 if (sax != NULL) {
12367 #ifdef LIBXML_SAX1_ENABLED
12368 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12369 #endif /* LIBXML_SAX1_ENABLED */
12370 xmlFree(ctxt->sax);
12371 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12372 if (ctxt->sax == NULL) {
12373 xmlErrMemory(ctxt, NULL);
12374 xmlFreeParserInputBuffer(buf);
12375 xmlFreeParserCtxt(ctxt);
12376 return(NULL);
12377 }
12378 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12379 if (sax->initialized == XML_SAX2_MAGIC)
12380 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12381 else
12382 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12383 if (user_data != NULL)
12384 ctxt->userData = user_data;
12385 }
12386 if (filename == NULL) {
12387 ctxt->directory = NULL;
12388 } else {
12389 ctxt->directory = xmlParserGetDirectory(filename);
12390 }
12391
12392 inputStream = xmlNewInputStream(ctxt);
12393 if (inputStream == NULL) {
12394 xmlFreeParserCtxt(ctxt);
12395 xmlFreeParserInputBuffer(buf);
12396 return(NULL);
12397 }
12398
12399 if (filename == NULL)
12400 inputStream->filename = NULL;
12401 else {
12402 inputStream->filename = (char *)
12403 xmlCanonicPath((const xmlChar *) filename);
12404 if (inputStream->filename == NULL) {
12405 xmlFreeParserCtxt(ctxt);
12406 xmlFreeParserInputBuffer(buf);
12407 return(NULL);
12408 }
12409 }
12410 inputStream->buf = buf;
12411 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12412 inputPush(ctxt, inputStream);
12413
12414 /*
12415 * If the caller didn't provide an initial 'chunk' for determining
12416 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12417 * that it can be automatically determined later
12418 */
12419 if ((size == 0) || (chunk == NULL)) {
12420 ctxt->charset = XML_CHAR_ENCODING_NONE;
12421 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12422 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12423 size_t cur = ctxt->input->cur - ctxt->input->base;
12424
12425 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12426
12427 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12428 #ifdef DEBUG_PUSH
12429 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12430 #endif
12431 }
12432
12433 if (enc != XML_CHAR_ENCODING_NONE) {
12434 xmlSwitchEncoding(ctxt, enc);
12435 }
12436
12437 return(ctxt);
12438 }
12439 #endif /* LIBXML_PUSH_ENABLED */
12440
12441 /**
12442 * xmlHaltParser:
12443 * @ctxt: an XML parser context
12444 *
12445 * Blocks further parser processing don't override error
12446 * for internal use
12447 */
12448 static void
xmlHaltParser(xmlParserCtxtPtr ctxt)12449 xmlHaltParser(xmlParserCtxtPtr ctxt) {
12450 if (ctxt == NULL)
12451 return;
12452 ctxt->instate = XML_PARSER_EOF;
12453 ctxt->disableSAX = 1;
12454 while (ctxt->inputNr > 1)
12455 xmlFreeInputStream(inputPop(ctxt));
12456 if (ctxt->input != NULL) {
12457 /*
12458 * in case there was a specific allocation deallocate before
12459 * overriding base
12460 */
12461 if (ctxt->input->free != NULL) {
12462 ctxt->input->free((xmlChar *) ctxt->input->base);
12463 ctxt->input->free = NULL;
12464 }
12465 if (ctxt->input->buf != NULL) {
12466 xmlFreeParserInputBuffer(ctxt->input->buf);
12467 ctxt->input->buf = NULL;
12468 }
12469 ctxt->input->cur = BAD_CAST"";
12470 ctxt->input->length = 0;
12471 ctxt->input->base = ctxt->input->cur;
12472 ctxt->input->end = ctxt->input->cur;
12473 }
12474 }
12475
12476 /**
12477 * xmlStopParser:
12478 * @ctxt: an XML parser context
12479 *
12480 * Blocks further parser processing
12481 */
12482 void
xmlStopParser(xmlParserCtxtPtr ctxt)12483 xmlStopParser(xmlParserCtxtPtr ctxt) {
12484 if (ctxt == NULL)
12485 return;
12486 xmlHaltParser(ctxt);
12487 ctxt->errNo = XML_ERR_USER_STOP;
12488 }
12489
12490 /**
12491 * xmlCreateIOParserCtxt:
12492 * @sax: a SAX handler
12493 * @user_data: The user data returned on SAX callbacks
12494 * @ioread: an I/O read function
12495 * @ioclose: an I/O close function
12496 * @ioctx: an I/O handler
12497 * @enc: the charset encoding if known
12498 *
12499 * Create a parser context for using the XML parser with an existing
12500 * I/O stream
12501 *
12502 * Returns the new parser context or NULL
12503 */
12504 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12505 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12506 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12507 void *ioctx, xmlCharEncoding enc) {
12508 xmlParserCtxtPtr ctxt;
12509 xmlParserInputPtr inputStream;
12510 xmlParserInputBufferPtr buf;
12511
12512 if (ioread == NULL) return(NULL);
12513
12514 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12515 if (buf == NULL) {
12516 if (ioclose != NULL)
12517 ioclose(ioctx);
12518 return (NULL);
12519 }
12520
12521 ctxt = xmlNewParserCtxt();
12522 if (ctxt == NULL) {
12523 xmlFreeParserInputBuffer(buf);
12524 return(NULL);
12525 }
12526 if (sax != NULL) {
12527 #ifdef LIBXML_SAX1_ENABLED
12528 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12529 #endif /* LIBXML_SAX1_ENABLED */
12530 xmlFree(ctxt->sax);
12531 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12532 if (ctxt->sax == NULL) {
12533 xmlErrMemory(ctxt, NULL);
12534 xmlFreeParserCtxt(ctxt);
12535 return(NULL);
12536 }
12537 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12538 if (sax->initialized == XML_SAX2_MAGIC)
12539 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12540 else
12541 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12542 if (user_data != NULL)
12543 ctxt->userData = user_data;
12544 }
12545
12546 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12547 if (inputStream == NULL) {
12548 xmlFreeParserCtxt(ctxt);
12549 return(NULL);
12550 }
12551 inputPush(ctxt, inputStream);
12552
12553 return(ctxt);
12554 }
12555
12556 #ifdef LIBXML_VALID_ENABLED
12557 /************************************************************************
12558 * *
12559 * Front ends when parsing a DTD *
12560 * *
12561 ************************************************************************/
12562
12563 /**
12564 * xmlIOParseDTD:
12565 * @sax: the SAX handler block or NULL
12566 * @input: an Input Buffer
12567 * @enc: the charset encoding if known
12568 *
12569 * Load and parse a DTD
12570 *
12571 * Returns the resulting xmlDtdPtr or NULL in case of error.
12572 * @input will be freed by the function in any case.
12573 */
12574
12575 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12576 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12577 xmlCharEncoding enc) {
12578 xmlDtdPtr ret = NULL;
12579 xmlParserCtxtPtr ctxt;
12580 xmlParserInputPtr pinput = NULL;
12581 xmlChar start[4];
12582
12583 if (input == NULL)
12584 return(NULL);
12585
12586 ctxt = xmlNewParserCtxt();
12587 if (ctxt == NULL) {
12588 xmlFreeParserInputBuffer(input);
12589 return(NULL);
12590 }
12591
12592 /* We are loading a DTD */
12593 ctxt->options |= XML_PARSE_DTDLOAD;
12594
12595 /*
12596 * Set-up the SAX context
12597 */
12598 if (sax != NULL) {
12599 if (ctxt->sax != NULL)
12600 xmlFree(ctxt->sax);
12601 ctxt->sax = sax;
12602 ctxt->userData = ctxt;
12603 }
12604 xmlDetectSAX2(ctxt);
12605
12606 /*
12607 * generate a parser input from the I/O handler
12608 */
12609
12610 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12611 if (pinput == NULL) {
12612 if (sax != NULL) ctxt->sax = NULL;
12613 xmlFreeParserInputBuffer(input);
12614 xmlFreeParserCtxt(ctxt);
12615 return(NULL);
12616 }
12617
12618 /*
12619 * plug some encoding conversion routines here.
12620 */
12621 if (xmlPushInput(ctxt, pinput) < 0) {
12622 if (sax != NULL) ctxt->sax = NULL;
12623 xmlFreeParserCtxt(ctxt);
12624 return(NULL);
12625 }
12626 if (enc != XML_CHAR_ENCODING_NONE) {
12627 xmlSwitchEncoding(ctxt, enc);
12628 }
12629
12630 pinput->filename = NULL;
12631 pinput->line = 1;
12632 pinput->col = 1;
12633 pinput->base = ctxt->input->cur;
12634 pinput->cur = ctxt->input->cur;
12635 pinput->free = NULL;
12636
12637 /*
12638 * let's parse that entity knowing it's an external subset.
12639 */
12640 ctxt->inSubset = 2;
12641 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12642 if (ctxt->myDoc == NULL) {
12643 xmlErrMemory(ctxt, "New Doc failed");
12644 return(NULL);
12645 }
12646 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12647 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12648 BAD_CAST "none", BAD_CAST "none");
12649
12650 if ((enc == XML_CHAR_ENCODING_NONE) &&
12651 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12652 /*
12653 * Get the 4 first bytes and decode the charset
12654 * if enc != XML_CHAR_ENCODING_NONE
12655 * plug some encoding conversion routines.
12656 */
12657 start[0] = RAW;
12658 start[1] = NXT(1);
12659 start[2] = NXT(2);
12660 start[3] = NXT(3);
12661 enc = xmlDetectCharEncoding(start, 4);
12662 if (enc != XML_CHAR_ENCODING_NONE) {
12663 xmlSwitchEncoding(ctxt, enc);
12664 }
12665 }
12666
12667 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12668
12669 if (ctxt->myDoc != NULL) {
12670 if (ctxt->wellFormed) {
12671 ret = ctxt->myDoc->extSubset;
12672 ctxt->myDoc->extSubset = NULL;
12673 if (ret != NULL) {
12674 xmlNodePtr tmp;
12675
12676 ret->doc = NULL;
12677 tmp = ret->children;
12678 while (tmp != NULL) {
12679 tmp->doc = NULL;
12680 tmp = tmp->next;
12681 }
12682 }
12683 } else {
12684 ret = NULL;
12685 }
12686 xmlFreeDoc(ctxt->myDoc);
12687 ctxt->myDoc = NULL;
12688 }
12689 if (sax != NULL) ctxt->sax = NULL;
12690 xmlFreeParserCtxt(ctxt);
12691
12692 return(ret);
12693 }
12694
12695 /**
12696 * xmlSAXParseDTD:
12697 * @sax: the SAX handler block
12698 * @ExternalID: a NAME* containing the External ID of the DTD
12699 * @SystemID: a NAME* containing the URL to the DTD
12700 *
12701 * Load and parse an external subset.
12702 *
12703 * Returns the resulting xmlDtdPtr or NULL in case of error.
12704 */
12705
12706 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12707 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12708 const xmlChar *SystemID) {
12709 xmlDtdPtr ret = NULL;
12710 xmlParserCtxtPtr ctxt;
12711 xmlParserInputPtr input = NULL;
12712 xmlCharEncoding enc;
12713 xmlChar* systemIdCanonic;
12714
12715 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12716
12717 ctxt = xmlNewParserCtxt();
12718 if (ctxt == NULL) {
12719 return(NULL);
12720 }
12721
12722 /* We are loading a DTD */
12723 ctxt->options |= XML_PARSE_DTDLOAD;
12724
12725 /*
12726 * Set-up the SAX context
12727 */
12728 if (sax != NULL) {
12729 if (ctxt->sax != NULL)
12730 xmlFree(ctxt->sax);
12731 ctxt->sax = sax;
12732 ctxt->userData = ctxt;
12733 }
12734
12735 /*
12736 * Canonicalise the system ID
12737 */
12738 systemIdCanonic = xmlCanonicPath(SystemID);
12739 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12740 xmlFreeParserCtxt(ctxt);
12741 return(NULL);
12742 }
12743
12744 /*
12745 * Ask the Entity resolver to load the damn thing
12746 */
12747
12748 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12749 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12750 systemIdCanonic);
12751 if (input == NULL) {
12752 if (sax != NULL) ctxt->sax = NULL;
12753 xmlFreeParserCtxt(ctxt);
12754 if (systemIdCanonic != NULL)
12755 xmlFree(systemIdCanonic);
12756 return(NULL);
12757 }
12758
12759 /*
12760 * plug some encoding conversion routines here.
12761 */
12762 if (xmlPushInput(ctxt, input) < 0) {
12763 if (sax != NULL) ctxt->sax = NULL;
12764 xmlFreeParserCtxt(ctxt);
12765 if (systemIdCanonic != NULL)
12766 xmlFree(systemIdCanonic);
12767 return(NULL);
12768 }
12769 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12770 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12771 xmlSwitchEncoding(ctxt, enc);
12772 }
12773
12774 if (input->filename == NULL)
12775 input->filename = (char *) systemIdCanonic;
12776 else
12777 xmlFree(systemIdCanonic);
12778 input->line = 1;
12779 input->col = 1;
12780 input->base = ctxt->input->cur;
12781 input->cur = ctxt->input->cur;
12782 input->free = NULL;
12783
12784 /*
12785 * let's parse that entity knowing it's an external subset.
12786 */
12787 ctxt->inSubset = 2;
12788 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12789 if (ctxt->myDoc == NULL) {
12790 xmlErrMemory(ctxt, "New Doc failed");
12791 if (sax != NULL) ctxt->sax = NULL;
12792 xmlFreeParserCtxt(ctxt);
12793 return(NULL);
12794 }
12795 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12796 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12797 ExternalID, SystemID);
12798 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12799
12800 if (ctxt->myDoc != NULL) {
12801 if (ctxt->wellFormed) {
12802 ret = ctxt->myDoc->extSubset;
12803 ctxt->myDoc->extSubset = NULL;
12804 if (ret != NULL) {
12805 xmlNodePtr tmp;
12806
12807 ret->doc = NULL;
12808 tmp = ret->children;
12809 while (tmp != NULL) {
12810 tmp->doc = NULL;
12811 tmp = tmp->next;
12812 }
12813 }
12814 } else {
12815 ret = NULL;
12816 }
12817 xmlFreeDoc(ctxt->myDoc);
12818 ctxt->myDoc = NULL;
12819 }
12820 if (sax != NULL) ctxt->sax = NULL;
12821 xmlFreeParserCtxt(ctxt);
12822
12823 return(ret);
12824 }
12825
12826
12827 /**
12828 * xmlParseDTD:
12829 * @ExternalID: a NAME* containing the External ID of the DTD
12830 * @SystemID: a NAME* containing the URL to the DTD
12831 *
12832 * Load and parse an external subset.
12833 *
12834 * Returns the resulting xmlDtdPtr or NULL in case of error.
12835 */
12836
12837 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12838 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12839 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12840 }
12841 #endif /* LIBXML_VALID_ENABLED */
12842
12843 /************************************************************************
12844 * *
12845 * Front ends when parsing an Entity *
12846 * *
12847 ************************************************************************/
12848
12849 /**
12850 * xmlParseCtxtExternalEntity:
12851 * @ctx: the existing parsing context
12852 * @URL: the URL for the entity to load
12853 * @ID: the System ID for the entity to load
12854 * @lst: the return value for the set of parsed nodes
12855 *
12856 * Parse an external general entity within an existing parsing context
12857 * An external general parsed entity is well-formed if it matches the
12858 * production labeled extParsedEnt.
12859 *
12860 * [78] extParsedEnt ::= TextDecl? content
12861 *
12862 * Returns 0 if the entity is well formed, -1 in case of args problem and
12863 * the parser error code otherwise
12864 */
12865
12866 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12867 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12868 const xmlChar *ID, xmlNodePtr *lst) {
12869 xmlParserCtxtPtr ctxt;
12870 xmlDocPtr newDoc;
12871 xmlNodePtr newRoot;
12872 xmlSAXHandlerPtr oldsax = NULL;
12873 int ret = 0;
12874 xmlChar start[4];
12875 xmlCharEncoding enc;
12876
12877 if (ctx == NULL) return(-1);
12878
12879 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12880 (ctx->depth > 1024)) {
12881 return(XML_ERR_ENTITY_LOOP);
12882 }
12883
12884 if (lst != NULL)
12885 *lst = NULL;
12886 if ((URL == NULL) && (ID == NULL))
12887 return(-1);
12888 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12889 return(-1);
12890
12891 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12892 if (ctxt == NULL) {
12893 return(-1);
12894 }
12895
12896 oldsax = ctxt->sax;
12897 ctxt->sax = ctx->sax;
12898 xmlDetectSAX2(ctxt);
12899 newDoc = xmlNewDoc(BAD_CAST "1.0");
12900 if (newDoc == NULL) {
12901 xmlFreeParserCtxt(ctxt);
12902 return(-1);
12903 }
12904 newDoc->properties = XML_DOC_INTERNAL;
12905 if (ctx->myDoc->dict) {
12906 newDoc->dict = ctx->myDoc->dict;
12907 xmlDictReference(newDoc->dict);
12908 }
12909 if (ctx->myDoc != NULL) {
12910 newDoc->intSubset = ctx->myDoc->intSubset;
12911 newDoc->extSubset = ctx->myDoc->extSubset;
12912 }
12913 if (ctx->myDoc->URL != NULL) {
12914 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12915 }
12916 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12917 if (newRoot == NULL) {
12918 ctxt->sax = oldsax;
12919 xmlFreeParserCtxt(ctxt);
12920 newDoc->intSubset = NULL;
12921 newDoc->extSubset = NULL;
12922 xmlFreeDoc(newDoc);
12923 return(-1);
12924 }
12925 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12926 nodePush(ctxt, newDoc->children);
12927 if (ctx->myDoc == NULL) {
12928 ctxt->myDoc = newDoc;
12929 } else {
12930 ctxt->myDoc = ctx->myDoc;
12931 newDoc->children->doc = ctx->myDoc;
12932 }
12933
12934 /*
12935 * Get the 4 first bytes and decode the charset
12936 * if enc != XML_CHAR_ENCODING_NONE
12937 * plug some encoding conversion routines.
12938 */
12939 GROW
12940 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12941 start[0] = RAW;
12942 start[1] = NXT(1);
12943 start[2] = NXT(2);
12944 start[3] = NXT(3);
12945 enc = xmlDetectCharEncoding(start, 4);
12946 if (enc != XML_CHAR_ENCODING_NONE) {
12947 xmlSwitchEncoding(ctxt, enc);
12948 }
12949 }
12950
12951 /*
12952 * Parse a possible text declaration first
12953 */
12954 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12955 xmlParseTextDecl(ctxt);
12956 /*
12957 * An XML-1.0 document can't reference an entity not XML-1.0
12958 */
12959 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12960 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12961 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12962 "Version mismatch between document and entity\n");
12963 }
12964 }
12965
12966 /*
12967 * If the user provided its own SAX callbacks then reuse the
12968 * useData callback field, otherwise the expected setup in a
12969 * DOM builder is to have userData == ctxt
12970 */
12971 if (ctx->userData == ctx)
12972 ctxt->userData = ctxt;
12973 else
12974 ctxt->userData = ctx->userData;
12975
12976 /*
12977 * Doing validity checking on chunk doesn't make sense
12978 */
12979 ctxt->instate = XML_PARSER_CONTENT;
12980 ctxt->validate = ctx->validate;
12981 ctxt->valid = ctx->valid;
12982 ctxt->loadsubset = ctx->loadsubset;
12983 ctxt->depth = ctx->depth + 1;
12984 ctxt->replaceEntities = ctx->replaceEntities;
12985 if (ctxt->validate) {
12986 ctxt->vctxt.error = ctx->vctxt.error;
12987 ctxt->vctxt.warning = ctx->vctxt.warning;
12988 } else {
12989 ctxt->vctxt.error = NULL;
12990 ctxt->vctxt.warning = NULL;
12991 }
12992 ctxt->vctxt.nodeTab = NULL;
12993 ctxt->vctxt.nodeNr = 0;
12994 ctxt->vctxt.nodeMax = 0;
12995 ctxt->vctxt.node = NULL;
12996 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12997 ctxt->dict = ctx->dict;
12998 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12999 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13000 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13001 ctxt->dictNames = ctx->dictNames;
13002 ctxt->attsDefault = ctx->attsDefault;
13003 ctxt->attsSpecial = ctx->attsSpecial;
13004 ctxt->linenumbers = ctx->linenumbers;
13005
13006 xmlParseContent(ctxt);
13007
13008 ctx->validate = ctxt->validate;
13009 ctx->valid = ctxt->valid;
13010 if ((RAW == '<') && (NXT(1) == '/')) {
13011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13012 } else if (RAW != 0) {
13013 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13014 }
13015 if (ctxt->node != newDoc->children) {
13016 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13017 }
13018
13019 if (!ctxt->wellFormed) {
13020 if (ctxt->errNo == 0)
13021 ret = 1;
13022 else
13023 ret = ctxt->errNo;
13024 } else {
13025 if (lst != NULL) {
13026 xmlNodePtr cur;
13027
13028 /*
13029 * Return the newly created nodeset after unlinking it from
13030 * they pseudo parent.
13031 */
13032 cur = newDoc->children->children;
13033 *lst = cur;
13034 while (cur != NULL) {
13035 cur->parent = NULL;
13036 cur = cur->next;
13037 }
13038 newDoc->children->children = NULL;
13039 }
13040 ret = 0;
13041 }
13042 ctxt->sax = oldsax;
13043 ctxt->dict = NULL;
13044 ctxt->attsDefault = NULL;
13045 ctxt->attsSpecial = NULL;
13046 xmlFreeParserCtxt(ctxt);
13047 newDoc->intSubset = NULL;
13048 newDoc->extSubset = NULL;
13049 xmlFreeDoc(newDoc);
13050
13051 return(ret);
13052 }
13053
13054 /**
13055 * xmlParseExternalEntityPrivate:
13056 * @doc: the document the chunk pertains to
13057 * @oldctxt: the previous parser context if available
13058 * @sax: the SAX handler bloc (possibly NULL)
13059 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13060 * @depth: Used for loop detection, use 0
13061 * @URL: the URL for the entity to load
13062 * @ID: the System ID for the entity to load
13063 * @list: the return value for the set of parsed nodes
13064 *
13065 * Private version of xmlParseExternalEntity()
13066 *
13067 * Returns 0 if the entity is well formed, -1 in case of args problem and
13068 * the parser error code otherwise
13069 */
13070
13071 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)13072 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13073 xmlSAXHandlerPtr sax,
13074 void *user_data, int depth, const xmlChar *URL,
13075 const xmlChar *ID, xmlNodePtr *list) {
13076 xmlParserCtxtPtr ctxt;
13077 xmlDocPtr newDoc;
13078 xmlNodePtr newRoot;
13079 xmlSAXHandlerPtr oldsax = NULL;
13080 xmlParserErrors ret = XML_ERR_OK;
13081 xmlChar start[4];
13082 xmlCharEncoding enc;
13083
13084 if (((depth > 40) &&
13085 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13086 (depth > 1024)) {
13087 return(XML_ERR_ENTITY_LOOP);
13088 }
13089
13090 if (list != NULL)
13091 *list = NULL;
13092 if ((URL == NULL) && (ID == NULL))
13093 return(XML_ERR_INTERNAL_ERROR);
13094 if (doc == NULL)
13095 return(XML_ERR_INTERNAL_ERROR);
13096
13097
13098 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13099 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13100 ctxt->userData = ctxt;
13101 if (oldctxt != NULL) {
13102 ctxt->_private = oldctxt->_private;
13103 ctxt->loadsubset = oldctxt->loadsubset;
13104 ctxt->validate = oldctxt->validate;
13105 ctxt->external = oldctxt->external;
13106 ctxt->record_info = oldctxt->record_info;
13107 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13108 ctxt->node_seq.length = oldctxt->node_seq.length;
13109 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13110 } else {
13111 /*
13112 * Doing validity checking on chunk without context
13113 * doesn't make sense
13114 */
13115 ctxt->_private = NULL;
13116 ctxt->validate = 0;
13117 ctxt->external = 2;
13118 ctxt->loadsubset = 0;
13119 }
13120 if (sax != NULL) {
13121 oldsax = ctxt->sax;
13122 ctxt->sax = sax;
13123 if (user_data != NULL)
13124 ctxt->userData = user_data;
13125 }
13126 xmlDetectSAX2(ctxt);
13127 newDoc = xmlNewDoc(BAD_CAST "1.0");
13128 if (newDoc == NULL) {
13129 ctxt->node_seq.maximum = 0;
13130 ctxt->node_seq.length = 0;
13131 ctxt->node_seq.buffer = NULL;
13132 xmlFreeParserCtxt(ctxt);
13133 return(XML_ERR_INTERNAL_ERROR);
13134 }
13135 newDoc->properties = XML_DOC_INTERNAL;
13136 newDoc->intSubset = doc->intSubset;
13137 newDoc->extSubset = doc->extSubset;
13138 newDoc->dict = doc->dict;
13139 xmlDictReference(newDoc->dict);
13140
13141 if (doc->URL != NULL) {
13142 newDoc->URL = xmlStrdup(doc->URL);
13143 }
13144 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13145 if (newRoot == NULL) {
13146 if (sax != NULL)
13147 ctxt->sax = oldsax;
13148 ctxt->node_seq.maximum = 0;
13149 ctxt->node_seq.length = 0;
13150 ctxt->node_seq.buffer = NULL;
13151 xmlFreeParserCtxt(ctxt);
13152 newDoc->intSubset = NULL;
13153 newDoc->extSubset = NULL;
13154 xmlFreeDoc(newDoc);
13155 return(XML_ERR_INTERNAL_ERROR);
13156 }
13157 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13158 nodePush(ctxt, newDoc->children);
13159 ctxt->myDoc = doc;
13160 newRoot->doc = doc;
13161
13162 /*
13163 * Get the 4 first bytes and decode the charset
13164 * if enc != XML_CHAR_ENCODING_NONE
13165 * plug some encoding conversion routines.
13166 */
13167 GROW;
13168 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13169 start[0] = RAW;
13170 start[1] = NXT(1);
13171 start[2] = NXT(2);
13172 start[3] = NXT(3);
13173 enc = xmlDetectCharEncoding(start, 4);
13174 if (enc != XML_CHAR_ENCODING_NONE) {
13175 xmlSwitchEncoding(ctxt, enc);
13176 }
13177 }
13178
13179 /*
13180 * Parse a possible text declaration first
13181 */
13182 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13183 xmlParseTextDecl(ctxt);
13184 }
13185
13186 ctxt->instate = XML_PARSER_CONTENT;
13187 ctxt->depth = depth;
13188
13189 xmlParseContent(ctxt);
13190
13191 if ((RAW == '<') && (NXT(1) == '/')) {
13192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13193 } else if (RAW != 0) {
13194 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13195 }
13196 if (ctxt->node != newDoc->children) {
13197 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13198 }
13199
13200 if (!ctxt->wellFormed) {
13201 if (ctxt->errNo == 0)
13202 ret = XML_ERR_INTERNAL_ERROR;
13203 else
13204 ret = (xmlParserErrors)ctxt->errNo;
13205 } else {
13206 if (list != NULL) {
13207 xmlNodePtr cur;
13208
13209 /*
13210 * Return the newly created nodeset after unlinking it from
13211 * they pseudo parent.
13212 */
13213 cur = newDoc->children->children;
13214 *list = cur;
13215 while (cur != NULL) {
13216 cur->parent = NULL;
13217 cur = cur->next;
13218 }
13219 newDoc->children->children = NULL;
13220 }
13221 ret = XML_ERR_OK;
13222 }
13223
13224 /*
13225 * Record in the parent context the number of entities replacement
13226 * done when parsing that reference.
13227 */
13228 if (oldctxt != NULL)
13229 oldctxt->nbentities += ctxt->nbentities;
13230
13231 /*
13232 * Also record the size of the entity parsed
13233 */
13234 if (ctxt->input != NULL && oldctxt != NULL) {
13235 oldctxt->sizeentities += ctxt->input->consumed;
13236 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13237 }
13238 /*
13239 * And record the last error if any
13240 */
13241 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13242 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13243
13244 if (sax != NULL)
13245 ctxt->sax = oldsax;
13246 if (oldctxt != NULL) {
13247 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13248 oldctxt->node_seq.length = ctxt->node_seq.length;
13249 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13250 }
13251 ctxt->node_seq.maximum = 0;
13252 ctxt->node_seq.length = 0;
13253 ctxt->node_seq.buffer = NULL;
13254 xmlFreeParserCtxt(ctxt);
13255 newDoc->intSubset = NULL;
13256 newDoc->extSubset = NULL;
13257 xmlFreeDoc(newDoc);
13258
13259 return(ret);
13260 }
13261
13262 #ifdef LIBXML_SAX1_ENABLED
13263 /**
13264 * xmlParseExternalEntity:
13265 * @doc: the document the chunk pertains to
13266 * @sax: the SAX handler bloc (possibly NULL)
13267 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13268 * @depth: Used for loop detection, use 0
13269 * @URL: the URL for the entity to load
13270 * @ID: the System ID for the entity to load
13271 * @lst: the return value for the set of parsed nodes
13272 *
13273 * Parse an external general entity
13274 * An external general parsed entity is well-formed if it matches the
13275 * production labeled extParsedEnt.
13276 *
13277 * [78] extParsedEnt ::= TextDecl? content
13278 *
13279 * Returns 0 if the entity is well formed, -1 in case of args problem and
13280 * the parser error code otherwise
13281 */
13282
13283 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)13284 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13285 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13286 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13287 ID, lst));
13288 }
13289
13290 /**
13291 * xmlParseBalancedChunkMemory:
13292 * @doc: the document the chunk pertains to
13293 * @sax: the SAX handler bloc (possibly NULL)
13294 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13295 * @depth: Used for loop detection, use 0
13296 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13297 * @lst: the return value for the set of parsed nodes
13298 *
13299 * Parse a well-balanced chunk of an XML document
13300 * called by the parser
13301 * The allowed sequence for the Well Balanced Chunk is the one defined by
13302 * the content production in the XML grammar:
13303 *
13304 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13305 *
13306 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13307 * the parser error code otherwise
13308 */
13309
13310 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)13311 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13312 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13313 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13314 depth, string, lst, 0 );
13315 }
13316 #endif /* LIBXML_SAX1_ENABLED */
13317
13318 /**
13319 * xmlParseBalancedChunkMemoryInternal:
13320 * @oldctxt: the existing parsing context
13321 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13322 * @user_data: the user data field for the parser context
13323 * @lst: the return value for the set of parsed nodes
13324 *
13325 *
13326 * Parse a well-balanced chunk of an XML document
13327 * called by the parser
13328 * The allowed sequence for the Well Balanced Chunk is the one defined by
13329 * the content production in the XML grammar:
13330 *
13331 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13332 *
13333 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13334 * error code otherwise
13335 *
13336 * In case recover is set to 1, the nodelist will not be empty even if
13337 * the parsed chunk is not well balanced.
13338 */
13339 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)13340 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13341 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13342 xmlParserCtxtPtr ctxt;
13343 xmlDocPtr newDoc = NULL;
13344 xmlNodePtr newRoot;
13345 xmlSAXHandlerPtr oldsax = NULL;
13346 xmlNodePtr content = NULL;
13347 xmlNodePtr last = NULL;
13348 int size;
13349 xmlParserErrors ret = XML_ERR_OK;
13350 #ifdef SAX2
13351 int i;
13352 #endif
13353
13354 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13355 (oldctxt->depth > 1024)) {
13356 return(XML_ERR_ENTITY_LOOP);
13357 }
13358
13359
13360 if (lst != NULL)
13361 *lst = NULL;
13362 if (string == NULL)
13363 return(XML_ERR_INTERNAL_ERROR);
13364
13365 size = xmlStrlen(string);
13366
13367 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13368 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13369 if (user_data != NULL)
13370 ctxt->userData = user_data;
13371 else
13372 ctxt->userData = ctxt;
13373 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13374 ctxt->dict = oldctxt->dict;
13375 ctxt->input_id = oldctxt->input_id + 1;
13376 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13377 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13378 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13379
13380 #ifdef SAX2
13381 /* propagate namespaces down the entity */
13382 for (i = 0;i < oldctxt->nsNr;i += 2) {
13383 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13384 }
13385 #endif
13386
13387 oldsax = ctxt->sax;
13388 ctxt->sax = oldctxt->sax;
13389 xmlDetectSAX2(ctxt);
13390 ctxt->replaceEntities = oldctxt->replaceEntities;
13391 ctxt->options = oldctxt->options;
13392
13393 ctxt->_private = oldctxt->_private;
13394 if (oldctxt->myDoc == NULL) {
13395 newDoc = xmlNewDoc(BAD_CAST "1.0");
13396 if (newDoc == NULL) {
13397 ctxt->sax = oldsax;
13398 ctxt->dict = NULL;
13399 xmlFreeParserCtxt(ctxt);
13400 return(XML_ERR_INTERNAL_ERROR);
13401 }
13402 newDoc->properties = XML_DOC_INTERNAL;
13403 newDoc->dict = ctxt->dict;
13404 xmlDictReference(newDoc->dict);
13405 ctxt->myDoc = newDoc;
13406 } else {
13407 ctxt->myDoc = oldctxt->myDoc;
13408 content = ctxt->myDoc->children;
13409 last = ctxt->myDoc->last;
13410 }
13411 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13412 if (newRoot == NULL) {
13413 ctxt->sax = oldsax;
13414 ctxt->dict = NULL;
13415 xmlFreeParserCtxt(ctxt);
13416 if (newDoc != NULL) {
13417 xmlFreeDoc(newDoc);
13418 }
13419 return(XML_ERR_INTERNAL_ERROR);
13420 }
13421 ctxt->myDoc->children = NULL;
13422 ctxt->myDoc->last = NULL;
13423 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13424 nodePush(ctxt, ctxt->myDoc->children);
13425 ctxt->instate = XML_PARSER_CONTENT;
13426 ctxt->depth = oldctxt->depth + 1;
13427
13428 ctxt->validate = 0;
13429 ctxt->loadsubset = oldctxt->loadsubset;
13430 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13431 /*
13432 * ID/IDREF registration will be done in xmlValidateElement below
13433 */
13434 ctxt->loadsubset |= XML_SKIP_IDS;
13435 }
13436 ctxt->dictNames = oldctxt->dictNames;
13437 ctxt->attsDefault = oldctxt->attsDefault;
13438 ctxt->attsSpecial = oldctxt->attsSpecial;
13439
13440 xmlParseContent(ctxt);
13441 if ((RAW == '<') && (NXT(1) == '/')) {
13442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13443 } else if (RAW != 0) {
13444 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13445 }
13446 if (ctxt->node != ctxt->myDoc->children) {
13447 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13448 }
13449
13450 if (!ctxt->wellFormed) {
13451 if (ctxt->errNo == 0)
13452 ret = XML_ERR_INTERNAL_ERROR;
13453 else
13454 ret = (xmlParserErrors)ctxt->errNo;
13455 } else {
13456 ret = XML_ERR_OK;
13457 }
13458
13459 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13460 xmlNodePtr cur;
13461
13462 /*
13463 * Return the newly created nodeset after unlinking it from
13464 * they pseudo parent.
13465 */
13466 cur = ctxt->myDoc->children->children;
13467 *lst = cur;
13468 while (cur != NULL) {
13469 #ifdef LIBXML_VALID_ENABLED
13470 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13471 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13472 (cur->type == XML_ELEMENT_NODE)) {
13473 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13474 oldctxt->myDoc, cur);
13475 }
13476 #endif /* LIBXML_VALID_ENABLED */
13477 cur->parent = NULL;
13478 cur = cur->next;
13479 }
13480 ctxt->myDoc->children->children = NULL;
13481 }
13482 if (ctxt->myDoc != NULL) {
13483 xmlFreeNode(ctxt->myDoc->children);
13484 ctxt->myDoc->children = content;
13485 ctxt->myDoc->last = last;
13486 }
13487
13488 /*
13489 * Record in the parent context the number of entities replacement
13490 * done when parsing that reference.
13491 */
13492 if (oldctxt != NULL)
13493 oldctxt->nbentities += ctxt->nbentities;
13494
13495 /*
13496 * Also record the last error if any
13497 */
13498 if (ctxt->lastError.code != XML_ERR_OK)
13499 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13500
13501 ctxt->sax = oldsax;
13502 ctxt->dict = NULL;
13503 ctxt->attsDefault = NULL;
13504 ctxt->attsSpecial = NULL;
13505 xmlFreeParserCtxt(ctxt);
13506 if (newDoc != NULL) {
13507 xmlFreeDoc(newDoc);
13508 }
13509
13510 return(ret);
13511 }
13512
13513 /**
13514 * xmlParseInNodeContext:
13515 * @node: the context node
13516 * @data: the input string
13517 * @datalen: the input string length in bytes
13518 * @options: a combination of xmlParserOption
13519 * @lst: the return value for the set of parsed nodes
13520 *
13521 * Parse a well-balanced chunk of an XML document
13522 * within the context (DTD, namespaces, etc ...) of the given node.
13523 *
13524 * The allowed sequence for the data is a Well Balanced Chunk defined by
13525 * the content production in the XML grammar:
13526 *
13527 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13528 *
13529 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13530 * error code otherwise
13531 */
13532 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13533 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13534 int options, xmlNodePtr *lst) {
13535 #ifdef SAX2
13536 xmlParserCtxtPtr ctxt;
13537 xmlDocPtr doc = NULL;
13538 xmlNodePtr fake, cur;
13539 int nsnr = 0;
13540
13541 xmlParserErrors ret = XML_ERR_OK;
13542
13543 /*
13544 * check all input parameters, grab the document
13545 */
13546 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13547 return(XML_ERR_INTERNAL_ERROR);
13548 switch (node->type) {
13549 case XML_ELEMENT_NODE:
13550 case XML_ATTRIBUTE_NODE:
13551 case XML_TEXT_NODE:
13552 case XML_CDATA_SECTION_NODE:
13553 case XML_ENTITY_REF_NODE:
13554 case XML_PI_NODE:
13555 case XML_COMMENT_NODE:
13556 case XML_DOCUMENT_NODE:
13557 case XML_HTML_DOCUMENT_NODE:
13558 break;
13559 default:
13560 return(XML_ERR_INTERNAL_ERROR);
13561
13562 }
13563 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13564 (node->type != XML_DOCUMENT_NODE) &&
13565 (node->type != XML_HTML_DOCUMENT_NODE))
13566 node = node->parent;
13567 if (node == NULL)
13568 return(XML_ERR_INTERNAL_ERROR);
13569 if (node->type == XML_ELEMENT_NODE)
13570 doc = node->doc;
13571 else
13572 doc = (xmlDocPtr) node;
13573 if (doc == NULL)
13574 return(XML_ERR_INTERNAL_ERROR);
13575
13576 /*
13577 * allocate a context and set-up everything not related to the
13578 * node position in the tree
13579 */
13580 if (doc->type == XML_DOCUMENT_NODE)
13581 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13582 #ifdef LIBXML_HTML_ENABLED
13583 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13584 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13585 /*
13586 * When parsing in context, it makes no sense to add implied
13587 * elements like html/body/etc...
13588 */
13589 options |= HTML_PARSE_NOIMPLIED;
13590 }
13591 #endif
13592 else
13593 return(XML_ERR_INTERNAL_ERROR);
13594
13595 if (ctxt == NULL)
13596 return(XML_ERR_NO_MEMORY);
13597
13598 /*
13599 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13600 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13601 * we must wait until the last moment to free the original one.
13602 */
13603 if (doc->dict != NULL) {
13604 if (ctxt->dict != NULL)
13605 xmlDictFree(ctxt->dict);
13606 ctxt->dict = doc->dict;
13607 } else
13608 options |= XML_PARSE_NODICT;
13609
13610 if (doc->encoding != NULL) {
13611 xmlCharEncodingHandlerPtr hdlr;
13612
13613 if (ctxt->encoding != NULL)
13614 xmlFree((xmlChar *) ctxt->encoding);
13615 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13616
13617 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13618 if (hdlr != NULL) {
13619 xmlSwitchToEncoding(ctxt, hdlr);
13620 } else {
13621 return(XML_ERR_UNSUPPORTED_ENCODING);
13622 }
13623 }
13624
13625 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13626 xmlDetectSAX2(ctxt);
13627 ctxt->myDoc = doc;
13628 /* parsing in context, i.e. as within existing content */
13629 ctxt->input_id = 2;
13630 ctxt->instate = XML_PARSER_CONTENT;
13631
13632 fake = xmlNewComment(NULL);
13633 if (fake == NULL) {
13634 xmlFreeParserCtxt(ctxt);
13635 return(XML_ERR_NO_MEMORY);
13636 }
13637 xmlAddChild(node, fake);
13638
13639 if (node->type == XML_ELEMENT_NODE) {
13640 nodePush(ctxt, node);
13641 /*
13642 * initialize the SAX2 namespaces stack
13643 */
13644 cur = node;
13645 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13646 xmlNsPtr ns = cur->nsDef;
13647 const xmlChar *iprefix, *ihref;
13648
13649 while (ns != NULL) {
13650 if (ctxt->dict) {
13651 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13652 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13653 } else {
13654 iprefix = ns->prefix;
13655 ihref = ns->href;
13656 }
13657
13658 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13659 nsPush(ctxt, iprefix, ihref);
13660 nsnr++;
13661 }
13662 ns = ns->next;
13663 }
13664 cur = cur->parent;
13665 }
13666 }
13667
13668 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13669 /*
13670 * ID/IDREF registration will be done in xmlValidateElement below
13671 */
13672 ctxt->loadsubset |= XML_SKIP_IDS;
13673 }
13674
13675 #ifdef LIBXML_HTML_ENABLED
13676 if (doc->type == XML_HTML_DOCUMENT_NODE)
13677 __htmlParseContent(ctxt);
13678 else
13679 #endif
13680 xmlParseContent(ctxt);
13681
13682 nsPop(ctxt, nsnr);
13683 if ((RAW == '<') && (NXT(1) == '/')) {
13684 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685 } else if (RAW != 0) {
13686 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13687 }
13688 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13689 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13690 ctxt->wellFormed = 0;
13691 }
13692
13693 if (!ctxt->wellFormed) {
13694 if (ctxt->errNo == 0)
13695 ret = XML_ERR_INTERNAL_ERROR;
13696 else
13697 ret = (xmlParserErrors)ctxt->errNo;
13698 } else {
13699 ret = XML_ERR_OK;
13700 }
13701
13702 /*
13703 * Return the newly created nodeset after unlinking it from
13704 * the pseudo sibling.
13705 */
13706
13707 cur = fake->next;
13708 fake->next = NULL;
13709 node->last = fake;
13710
13711 if (cur != NULL) {
13712 cur->prev = NULL;
13713 }
13714
13715 *lst = cur;
13716
13717 while (cur != NULL) {
13718 cur->parent = NULL;
13719 cur = cur->next;
13720 }
13721
13722 xmlUnlinkNode(fake);
13723 xmlFreeNode(fake);
13724
13725
13726 if (ret != XML_ERR_OK) {
13727 xmlFreeNodeList(*lst);
13728 *lst = NULL;
13729 }
13730
13731 if (doc->dict != NULL)
13732 ctxt->dict = NULL;
13733 xmlFreeParserCtxt(ctxt);
13734
13735 return(ret);
13736 #else /* !SAX2 */
13737 return(XML_ERR_INTERNAL_ERROR);
13738 #endif
13739 }
13740
13741 #ifdef LIBXML_SAX1_ENABLED
13742 /**
13743 * xmlParseBalancedChunkMemoryRecover:
13744 * @doc: the document the chunk pertains to
13745 * @sax: the SAX handler bloc (possibly NULL)
13746 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13747 * @depth: Used for loop detection, use 0
13748 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13749 * @lst: the return value for the set of parsed nodes
13750 * @recover: return nodes even if the data is broken (use 0)
13751 *
13752 *
13753 * Parse a well-balanced chunk of an XML document
13754 * called by the parser
13755 * The allowed sequence for the Well Balanced Chunk is the one defined by
13756 * the content production in the XML grammar:
13757 *
13758 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13759 *
13760 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13761 * the parser error code otherwise
13762 *
13763 * In case recover is set to 1, the nodelist will not be empty even if
13764 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13765 * some extent.
13766 */
13767 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13768 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13769 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13770 int recover) {
13771 xmlParserCtxtPtr ctxt;
13772 xmlDocPtr newDoc;
13773 xmlSAXHandlerPtr oldsax = NULL;
13774 xmlNodePtr content, newRoot;
13775 int size;
13776 int ret = 0;
13777
13778 if (depth > 40) {
13779 return(XML_ERR_ENTITY_LOOP);
13780 }
13781
13782
13783 if (lst != NULL)
13784 *lst = NULL;
13785 if (string == NULL)
13786 return(-1);
13787
13788 size = xmlStrlen(string);
13789
13790 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13791 if (ctxt == NULL) return(-1);
13792 ctxt->userData = ctxt;
13793 if (sax != NULL) {
13794 oldsax = ctxt->sax;
13795 ctxt->sax = sax;
13796 if (user_data != NULL)
13797 ctxt->userData = user_data;
13798 }
13799 newDoc = xmlNewDoc(BAD_CAST "1.0");
13800 if (newDoc == NULL) {
13801 xmlFreeParserCtxt(ctxt);
13802 return(-1);
13803 }
13804 newDoc->properties = XML_DOC_INTERNAL;
13805 if ((doc != NULL) && (doc->dict != NULL)) {
13806 xmlDictFree(ctxt->dict);
13807 ctxt->dict = doc->dict;
13808 xmlDictReference(ctxt->dict);
13809 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13810 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13811 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13812 ctxt->dictNames = 1;
13813 } else {
13814 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13815 }
13816 if (doc != NULL) {
13817 newDoc->intSubset = doc->intSubset;
13818 newDoc->extSubset = doc->extSubset;
13819 }
13820 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13821 if (newRoot == NULL) {
13822 if (sax != NULL)
13823 ctxt->sax = oldsax;
13824 xmlFreeParserCtxt(ctxt);
13825 newDoc->intSubset = NULL;
13826 newDoc->extSubset = NULL;
13827 xmlFreeDoc(newDoc);
13828 return(-1);
13829 }
13830 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13831 nodePush(ctxt, newRoot);
13832 if (doc == NULL) {
13833 ctxt->myDoc = newDoc;
13834 } else {
13835 ctxt->myDoc = newDoc;
13836 newDoc->children->doc = doc;
13837 /* Ensure that doc has XML spec namespace */
13838 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13839 newDoc->oldNs = doc->oldNs;
13840 }
13841 ctxt->instate = XML_PARSER_CONTENT;
13842 ctxt->input_id = 2;
13843 ctxt->depth = depth;
13844
13845 /*
13846 * Doing validity checking on chunk doesn't make sense
13847 */
13848 ctxt->validate = 0;
13849 ctxt->loadsubset = 0;
13850 xmlDetectSAX2(ctxt);
13851
13852 if ( doc != NULL ){
13853 content = doc->children;
13854 doc->children = NULL;
13855 xmlParseContent(ctxt);
13856 doc->children = content;
13857 }
13858 else {
13859 xmlParseContent(ctxt);
13860 }
13861 if ((RAW == '<') && (NXT(1) == '/')) {
13862 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13863 } else if (RAW != 0) {
13864 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13865 }
13866 if (ctxt->node != newDoc->children) {
13867 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13868 }
13869
13870 if (!ctxt->wellFormed) {
13871 if (ctxt->errNo == 0)
13872 ret = 1;
13873 else
13874 ret = ctxt->errNo;
13875 } else {
13876 ret = 0;
13877 }
13878
13879 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13880 xmlNodePtr cur;
13881
13882 /*
13883 * Return the newly created nodeset after unlinking it from
13884 * they pseudo parent.
13885 */
13886 cur = newDoc->children->children;
13887 *lst = cur;
13888 while (cur != NULL) {
13889 xmlSetTreeDoc(cur, doc);
13890 cur->parent = NULL;
13891 cur = cur->next;
13892 }
13893 newDoc->children->children = NULL;
13894 }
13895
13896 if (sax != NULL)
13897 ctxt->sax = oldsax;
13898 xmlFreeParserCtxt(ctxt);
13899 newDoc->intSubset = NULL;
13900 newDoc->extSubset = NULL;
13901 newDoc->oldNs = NULL;
13902 xmlFreeDoc(newDoc);
13903
13904 return(ret);
13905 }
13906
13907 /**
13908 * xmlSAXParseEntity:
13909 * @sax: the SAX handler block
13910 * @filename: the filename
13911 *
13912 * parse an XML external entity out of context and build a tree.
13913 * It use the given SAX function block to handle the parsing callback.
13914 * If sax is NULL, fallback to the default DOM tree building routines.
13915 *
13916 * [78] extParsedEnt ::= TextDecl? content
13917 *
13918 * This correspond to a "Well Balanced" chunk
13919 *
13920 * Returns the resulting document tree
13921 */
13922
13923 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13924 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13925 xmlDocPtr ret;
13926 xmlParserCtxtPtr ctxt;
13927
13928 ctxt = xmlCreateFileParserCtxt(filename);
13929 if (ctxt == NULL) {
13930 return(NULL);
13931 }
13932 if (sax != NULL) {
13933 if (ctxt->sax != NULL)
13934 xmlFree(ctxt->sax);
13935 ctxt->sax = sax;
13936 ctxt->userData = NULL;
13937 }
13938
13939 xmlParseExtParsedEnt(ctxt);
13940
13941 if (ctxt->wellFormed)
13942 ret = ctxt->myDoc;
13943 else {
13944 ret = NULL;
13945 xmlFreeDoc(ctxt->myDoc);
13946 ctxt->myDoc = NULL;
13947 }
13948 if (sax != NULL)
13949 ctxt->sax = NULL;
13950 xmlFreeParserCtxt(ctxt);
13951
13952 return(ret);
13953 }
13954
13955 /**
13956 * xmlParseEntity:
13957 * @filename: the filename
13958 *
13959 * parse an XML external entity out of context and build a tree.
13960 *
13961 * [78] extParsedEnt ::= TextDecl? content
13962 *
13963 * This correspond to a "Well Balanced" chunk
13964 *
13965 * Returns the resulting document tree
13966 */
13967
13968 xmlDocPtr
xmlParseEntity(const char * filename)13969 xmlParseEntity(const char *filename) {
13970 return(xmlSAXParseEntity(NULL, filename));
13971 }
13972 #endif /* LIBXML_SAX1_ENABLED */
13973
13974 /**
13975 * xmlCreateEntityParserCtxtInternal:
13976 * @URL: the entity URL
13977 * @ID: the entity PUBLIC ID
13978 * @base: a possible base for the target URI
13979 * @pctx: parser context used to set options on new context
13980 *
13981 * Create a parser context for an external entity
13982 * Automatic support for ZLIB/Compress compressed document is provided
13983 * by default if found at compile-time.
13984 *
13985 * Returns the new parser context or NULL
13986 */
13987 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13988 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13989 const xmlChar *base, xmlParserCtxtPtr pctx) {
13990 xmlParserCtxtPtr ctxt;
13991 xmlParserInputPtr inputStream;
13992 char *directory = NULL;
13993 xmlChar *uri;
13994
13995 ctxt = xmlNewParserCtxt();
13996 if (ctxt == NULL) {
13997 return(NULL);
13998 }
13999
14000 if (pctx != NULL) {
14001 ctxt->options = pctx->options;
14002 ctxt->_private = pctx->_private;
14003 /*
14004 * this is a subparser of pctx, so the input_id should be
14005 * incremented to distinguish from main entity
14006 */
14007 ctxt->input_id = pctx->input_id + 1;
14008 }
14009
14010 uri = xmlBuildURI(URL, base);
14011
14012 if (uri == NULL) {
14013 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14014 if (inputStream == NULL) {
14015 xmlFreeParserCtxt(ctxt);
14016 return(NULL);
14017 }
14018
14019 inputPush(ctxt, inputStream);
14020
14021 if ((ctxt->directory == NULL) && (directory == NULL))
14022 directory = xmlParserGetDirectory((char *)URL);
14023 if ((ctxt->directory == NULL) && (directory != NULL))
14024 ctxt->directory = directory;
14025 } else {
14026 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14027 if (inputStream == NULL) {
14028 xmlFree(uri);
14029 xmlFreeParserCtxt(ctxt);
14030 return(NULL);
14031 }
14032
14033 inputPush(ctxt, inputStream);
14034
14035 if ((ctxt->directory == NULL) && (directory == NULL))
14036 directory = xmlParserGetDirectory((char *)uri);
14037 if ((ctxt->directory == NULL) && (directory != NULL))
14038 ctxt->directory = directory;
14039 xmlFree(uri);
14040 }
14041 return(ctxt);
14042 }
14043
14044 /**
14045 * xmlCreateEntityParserCtxt:
14046 * @URL: the entity URL
14047 * @ID: the entity PUBLIC ID
14048 * @base: a possible base for the target URI
14049 *
14050 * Create a parser context for an external entity
14051 * Automatic support for ZLIB/Compress compressed document is provided
14052 * by default if found at compile-time.
14053 *
14054 * Returns the new parser context or NULL
14055 */
14056 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)14057 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14058 const xmlChar *base) {
14059 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14060
14061 }
14062
14063 /************************************************************************
14064 * *
14065 * Front ends when parsing from a file *
14066 * *
14067 ************************************************************************/
14068
14069 /**
14070 * xmlCreateURLParserCtxt:
14071 * @filename: the filename or URL
14072 * @options: a combination of xmlParserOption
14073 *
14074 * Create a parser context for a file or URL content.
14075 * Automatic support for ZLIB/Compress compressed document is provided
14076 * by default if found at compile-time and for file accesses
14077 *
14078 * Returns the new parser context or NULL
14079 */
14080 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)14081 xmlCreateURLParserCtxt(const char *filename, int options)
14082 {
14083 xmlParserCtxtPtr ctxt;
14084 xmlParserInputPtr inputStream;
14085 char *directory = NULL;
14086
14087 ctxt = xmlNewParserCtxt();
14088 if (ctxt == NULL) {
14089 xmlErrMemory(NULL, "cannot allocate parser context");
14090 return(NULL);
14091 }
14092
14093 if (options)
14094 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14095 ctxt->linenumbers = 1;
14096
14097 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14098 if (inputStream == NULL) {
14099 xmlFreeParserCtxt(ctxt);
14100 return(NULL);
14101 }
14102
14103 inputPush(ctxt, inputStream);
14104 if ((ctxt->directory == NULL) && (directory == NULL))
14105 directory = xmlParserGetDirectory(filename);
14106 if ((ctxt->directory == NULL) && (directory != NULL))
14107 ctxt->directory = directory;
14108
14109 return(ctxt);
14110 }
14111
14112 /**
14113 * xmlCreateFileParserCtxt:
14114 * @filename: the filename
14115 *
14116 * Create a parser context for a file content.
14117 * Automatic support for ZLIB/Compress compressed document is provided
14118 * by default if found at compile-time.
14119 *
14120 * Returns the new parser context or NULL
14121 */
14122 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)14123 xmlCreateFileParserCtxt(const char *filename)
14124 {
14125 return(xmlCreateURLParserCtxt(filename, 0));
14126 }
14127
14128 #ifdef LIBXML_SAX1_ENABLED
14129 /**
14130 * xmlSAXParseFileWithData:
14131 * @sax: the SAX handler block
14132 * @filename: the filename
14133 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14134 * documents
14135 * @data: the userdata
14136 *
14137 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14138 * compressed document is provided by default if found at compile-time.
14139 * It use the given SAX function block to handle the parsing callback.
14140 * If sax is NULL, fallback to the default DOM tree building routines.
14141 *
14142 * User data (void *) is stored within the parser context in the
14143 * context's _private member, so it is available nearly everywhere in libxml
14144 *
14145 * Returns the resulting document tree
14146 */
14147
14148 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)14149 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14150 int recovery, void *data) {
14151 xmlDocPtr ret;
14152 xmlParserCtxtPtr ctxt;
14153
14154 xmlInitParser();
14155
14156 ctxt = xmlCreateFileParserCtxt(filename);
14157 if (ctxt == NULL) {
14158 return(NULL);
14159 }
14160 if (sax != NULL) {
14161 if (ctxt->sax != NULL)
14162 xmlFree(ctxt->sax);
14163 ctxt->sax = sax;
14164 }
14165 xmlDetectSAX2(ctxt);
14166 if (data!=NULL) {
14167 ctxt->_private = data;
14168 }
14169
14170 if (ctxt->directory == NULL)
14171 ctxt->directory = xmlParserGetDirectory(filename);
14172
14173 ctxt->recovery = recovery;
14174
14175 xmlParseDocument(ctxt);
14176
14177 if ((ctxt->wellFormed) || recovery) {
14178 ret = ctxt->myDoc;
14179 if (ret != NULL) {
14180 if (ctxt->input->buf->compressed > 0)
14181 ret->compression = 9;
14182 else
14183 ret->compression = ctxt->input->buf->compressed;
14184 }
14185 }
14186 else {
14187 ret = NULL;
14188 xmlFreeDoc(ctxt->myDoc);
14189 ctxt->myDoc = NULL;
14190 }
14191 if (sax != NULL)
14192 ctxt->sax = NULL;
14193 xmlFreeParserCtxt(ctxt);
14194
14195 return(ret);
14196 }
14197
14198 /**
14199 * xmlSAXParseFile:
14200 * @sax: the SAX handler block
14201 * @filename: the filename
14202 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14203 * documents
14204 *
14205 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14206 * compressed document is provided by default if found at compile-time.
14207 * It use the given SAX function block to handle the parsing callback.
14208 * If sax is NULL, fallback to the default DOM tree building routines.
14209 *
14210 * Returns the resulting document tree
14211 */
14212
14213 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)14214 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14215 int recovery) {
14216 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14217 }
14218
14219 /**
14220 * xmlRecoverDoc:
14221 * @cur: a pointer to an array of xmlChar
14222 *
14223 * parse an XML in-memory document and build a tree.
14224 * In the case the document is not Well Formed, a attempt to build a
14225 * tree is tried anyway
14226 *
14227 * Returns the resulting document tree or NULL in case of failure
14228 */
14229
14230 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)14231 xmlRecoverDoc(const xmlChar *cur) {
14232 return(xmlSAXParseDoc(NULL, cur, 1));
14233 }
14234
14235 /**
14236 * xmlParseFile:
14237 * @filename: the filename
14238 *
14239 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14240 * compressed document is provided by default if found at compile-time.
14241 *
14242 * Returns the resulting document tree if the file was wellformed,
14243 * NULL otherwise.
14244 */
14245
14246 xmlDocPtr
xmlParseFile(const char * filename)14247 xmlParseFile(const char *filename) {
14248 return(xmlSAXParseFile(NULL, filename, 0));
14249 }
14250
14251 /**
14252 * xmlRecoverFile:
14253 * @filename: the filename
14254 *
14255 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14256 * compressed document is provided by default if found at compile-time.
14257 * In the case the document is not Well Formed, it attempts to build
14258 * a tree anyway
14259 *
14260 * Returns the resulting document tree or NULL in case of failure
14261 */
14262
14263 xmlDocPtr
xmlRecoverFile(const char * filename)14264 xmlRecoverFile(const char *filename) {
14265 return(xmlSAXParseFile(NULL, filename, 1));
14266 }
14267
14268
14269 /**
14270 * xmlSetupParserForBuffer:
14271 * @ctxt: an XML parser context
14272 * @buffer: a xmlChar * buffer
14273 * @filename: a file name
14274 *
14275 * Setup the parser context to parse a new buffer; Clears any prior
14276 * contents from the parser context. The buffer parameter must not be
14277 * NULL, but the filename parameter can be
14278 */
14279 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)14280 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14281 const char* filename)
14282 {
14283 xmlParserInputPtr input;
14284
14285 if ((ctxt == NULL) || (buffer == NULL))
14286 return;
14287
14288 input = xmlNewInputStream(ctxt);
14289 if (input == NULL) {
14290 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14291 xmlClearParserCtxt(ctxt);
14292 return;
14293 }
14294
14295 xmlClearParserCtxt(ctxt);
14296 if (filename != NULL)
14297 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14298 input->base = buffer;
14299 input->cur = buffer;
14300 input->end = &buffer[xmlStrlen(buffer)];
14301 inputPush(ctxt, input);
14302 }
14303
14304 /**
14305 * xmlSAXUserParseFile:
14306 * @sax: a SAX handler
14307 * @user_data: The user data returned on SAX callbacks
14308 * @filename: a file name
14309 *
14310 * parse an XML file and call the given SAX handler routines.
14311 * Automatic support for ZLIB/Compress compressed document is provided
14312 *
14313 * Returns 0 in case of success or a error number otherwise
14314 */
14315 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)14316 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14317 const char *filename) {
14318 int ret = 0;
14319 xmlParserCtxtPtr ctxt;
14320
14321 ctxt = xmlCreateFileParserCtxt(filename);
14322 if (ctxt == NULL) return -1;
14323 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14324 xmlFree(ctxt->sax);
14325 ctxt->sax = sax;
14326 xmlDetectSAX2(ctxt);
14327
14328 if (user_data != NULL)
14329 ctxt->userData = user_data;
14330
14331 xmlParseDocument(ctxt);
14332
14333 if (ctxt->wellFormed)
14334 ret = 0;
14335 else {
14336 if (ctxt->errNo != 0)
14337 ret = ctxt->errNo;
14338 else
14339 ret = -1;
14340 }
14341 if (sax != NULL)
14342 ctxt->sax = NULL;
14343 if (ctxt->myDoc != NULL) {
14344 xmlFreeDoc(ctxt->myDoc);
14345 ctxt->myDoc = NULL;
14346 }
14347 xmlFreeParserCtxt(ctxt);
14348
14349 return ret;
14350 }
14351 #endif /* LIBXML_SAX1_ENABLED */
14352
14353 /************************************************************************
14354 * *
14355 * Front ends when parsing from memory *
14356 * *
14357 ************************************************************************/
14358
14359 /**
14360 * xmlCreateMemoryParserCtxt:
14361 * @buffer: a pointer to a char array
14362 * @size: the size of the array
14363 *
14364 * Create a parser context for an XML in-memory document.
14365 *
14366 * Returns the new parser context or NULL
14367 */
14368 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14369 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14370 xmlParserCtxtPtr ctxt;
14371 xmlParserInputPtr input;
14372 xmlParserInputBufferPtr buf;
14373
14374 if (buffer == NULL)
14375 return(NULL);
14376 if (size <= 0)
14377 return(NULL);
14378
14379 ctxt = xmlNewParserCtxt();
14380 if (ctxt == NULL)
14381 return(NULL);
14382
14383 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14384 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14385 if (buf == NULL) {
14386 xmlFreeParserCtxt(ctxt);
14387 return(NULL);
14388 }
14389
14390 input = xmlNewInputStream(ctxt);
14391 if (input == NULL) {
14392 xmlFreeParserInputBuffer(buf);
14393 xmlFreeParserCtxt(ctxt);
14394 return(NULL);
14395 }
14396
14397 input->filename = NULL;
14398 input->buf = buf;
14399 xmlBufResetInput(input->buf->buffer, input);
14400
14401 inputPush(ctxt, input);
14402 return(ctxt);
14403 }
14404
14405 #ifdef LIBXML_SAX1_ENABLED
14406 /**
14407 * xmlSAXParseMemoryWithData:
14408 * @sax: the SAX handler block
14409 * @buffer: an pointer to a char array
14410 * @size: the size of the array
14411 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14412 * documents
14413 * @data: the userdata
14414 *
14415 * parse an XML in-memory block and use the given SAX function block
14416 * to handle the parsing callback. If sax is NULL, fallback to the default
14417 * DOM tree building routines.
14418 *
14419 * User data (void *) is stored within the parser context in the
14420 * context's _private member, so it is available nearly everywhere in libxml
14421 *
14422 * Returns the resulting document tree
14423 */
14424
14425 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14426 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14427 int size, int recovery, void *data) {
14428 xmlDocPtr ret;
14429 xmlParserCtxtPtr ctxt;
14430
14431 xmlInitParser();
14432
14433 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14434 if (ctxt == NULL) return(NULL);
14435 if (sax != NULL) {
14436 if (ctxt->sax != NULL)
14437 xmlFree(ctxt->sax);
14438 ctxt->sax = sax;
14439 }
14440 xmlDetectSAX2(ctxt);
14441 if (data!=NULL) {
14442 ctxt->_private=data;
14443 }
14444
14445 ctxt->recovery = recovery;
14446
14447 xmlParseDocument(ctxt);
14448
14449 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14450 else {
14451 ret = NULL;
14452 xmlFreeDoc(ctxt->myDoc);
14453 ctxt->myDoc = NULL;
14454 }
14455 if (sax != NULL)
14456 ctxt->sax = NULL;
14457 xmlFreeParserCtxt(ctxt);
14458
14459 return(ret);
14460 }
14461
14462 /**
14463 * xmlSAXParseMemory:
14464 * @sax: the SAX handler block
14465 * @buffer: an pointer to a char array
14466 * @size: the size of the array
14467 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14468 * documents
14469 *
14470 * parse an XML in-memory block and use the given SAX function block
14471 * to handle the parsing callback. If sax is NULL, fallback to the default
14472 * DOM tree building routines.
14473 *
14474 * Returns the resulting document tree
14475 */
14476 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14477 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14478 int size, int recovery) {
14479 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14480 }
14481
14482 /**
14483 * xmlParseMemory:
14484 * @buffer: an pointer to a char array
14485 * @size: the size of the array
14486 *
14487 * parse an XML in-memory block and build a tree.
14488 *
14489 * Returns the resulting document tree
14490 */
14491
xmlParseMemory(const char * buffer,int size)14492 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14493 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14494 }
14495
14496 /**
14497 * xmlRecoverMemory:
14498 * @buffer: an pointer to a char array
14499 * @size: the size of the array
14500 *
14501 * parse an XML in-memory block and build a tree.
14502 * In the case the document is not Well Formed, an attempt to
14503 * build a tree is tried anyway
14504 *
14505 * Returns the resulting document tree or NULL in case of error
14506 */
14507
xmlRecoverMemory(const char * buffer,int size)14508 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14509 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14510 }
14511
14512 /**
14513 * xmlSAXUserParseMemory:
14514 * @sax: a SAX handler
14515 * @user_data: The user data returned on SAX callbacks
14516 * @buffer: an in-memory XML document input
14517 * @size: the length of the XML document in bytes
14518 *
14519 * A better SAX parsing routine.
14520 * parse an XML in-memory buffer and call the given SAX handler routines.
14521 *
14522 * Returns 0 in case of success or a error number otherwise
14523 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14524 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14525 const char *buffer, int size) {
14526 int ret = 0;
14527 xmlParserCtxtPtr ctxt;
14528
14529 xmlInitParser();
14530
14531 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14532 if (ctxt == NULL) return -1;
14533 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14534 xmlFree(ctxt->sax);
14535 ctxt->sax = sax;
14536 xmlDetectSAX2(ctxt);
14537
14538 if (user_data != NULL)
14539 ctxt->userData = user_data;
14540
14541 xmlParseDocument(ctxt);
14542
14543 if (ctxt->wellFormed)
14544 ret = 0;
14545 else {
14546 if (ctxt->errNo != 0)
14547 ret = ctxt->errNo;
14548 else
14549 ret = -1;
14550 }
14551 if (sax != NULL)
14552 ctxt->sax = NULL;
14553 if (ctxt->myDoc != NULL) {
14554 xmlFreeDoc(ctxt->myDoc);
14555 ctxt->myDoc = NULL;
14556 }
14557 xmlFreeParserCtxt(ctxt);
14558
14559 return ret;
14560 }
14561 #endif /* LIBXML_SAX1_ENABLED */
14562
14563 /**
14564 * xmlCreateDocParserCtxt:
14565 * @cur: a pointer to an array of xmlChar
14566 *
14567 * Creates a parser context for an XML in-memory document.
14568 *
14569 * Returns the new parser context or NULL
14570 */
14571 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14572 xmlCreateDocParserCtxt(const xmlChar *cur) {
14573 int len;
14574
14575 if (cur == NULL)
14576 return(NULL);
14577 len = xmlStrlen(cur);
14578 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14579 }
14580
14581 #ifdef LIBXML_SAX1_ENABLED
14582 /**
14583 * xmlSAXParseDoc:
14584 * @sax: the SAX handler block
14585 * @cur: a pointer to an array of xmlChar
14586 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14587 * documents
14588 *
14589 * parse an XML in-memory document and build a tree.
14590 * It use the given SAX function block to handle the parsing callback.
14591 * If sax is NULL, fallback to the default DOM tree building routines.
14592 *
14593 * Returns the resulting document tree
14594 */
14595
14596 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14597 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14598 xmlDocPtr ret;
14599 xmlParserCtxtPtr ctxt;
14600 xmlSAXHandlerPtr oldsax = NULL;
14601
14602 if (cur == NULL) return(NULL);
14603
14604
14605 ctxt = xmlCreateDocParserCtxt(cur);
14606 if (ctxt == NULL) return(NULL);
14607 if (sax != NULL) {
14608 oldsax = ctxt->sax;
14609 ctxt->sax = sax;
14610 ctxt->userData = NULL;
14611 }
14612 xmlDetectSAX2(ctxt);
14613
14614 xmlParseDocument(ctxt);
14615 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14616 else {
14617 ret = NULL;
14618 xmlFreeDoc(ctxt->myDoc);
14619 ctxt->myDoc = NULL;
14620 }
14621 if (sax != NULL)
14622 ctxt->sax = oldsax;
14623 xmlFreeParserCtxt(ctxt);
14624
14625 return(ret);
14626 }
14627
14628 /**
14629 * xmlParseDoc:
14630 * @cur: a pointer to an array of xmlChar
14631 *
14632 * parse an XML in-memory document and build a tree.
14633 *
14634 * Returns the resulting document tree
14635 */
14636
14637 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14638 xmlParseDoc(const xmlChar *cur) {
14639 return(xmlSAXParseDoc(NULL, cur, 0));
14640 }
14641 #endif /* LIBXML_SAX1_ENABLED */
14642
14643 #ifdef LIBXML_LEGACY_ENABLED
14644 /************************************************************************
14645 * *
14646 * Specific function to keep track of entities references *
14647 * and used by the XSLT debugger *
14648 * *
14649 ************************************************************************/
14650
14651 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14652
14653 /**
14654 * xmlAddEntityReference:
14655 * @ent : A valid entity
14656 * @firstNode : A valid first node for children of entity
14657 * @lastNode : A valid last node of children entity
14658 *
14659 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14660 */
14661 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14662 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14663 xmlNodePtr lastNode)
14664 {
14665 if (xmlEntityRefFunc != NULL) {
14666 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14667 }
14668 }
14669
14670
14671 /**
14672 * xmlSetEntityReferenceFunc:
14673 * @func: A valid function
14674 *
14675 * Set the function to call call back when a xml reference has been made
14676 */
14677 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14678 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14679 {
14680 xmlEntityRefFunc = func;
14681 }
14682 #endif /* LIBXML_LEGACY_ENABLED */
14683
14684 /************************************************************************
14685 * *
14686 * Miscellaneous *
14687 * *
14688 ************************************************************************/
14689
14690 #ifdef LIBXML_XPATH_ENABLED
14691 #include <libxml/xpath.h>
14692 #endif
14693
14694 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14695 static int xmlParserInitialized = 0;
14696
14697 /**
14698 * xmlInitParser:
14699 *
14700 * Initialization function for the XML parser.
14701 * This is not reentrant. Call once before processing in case of
14702 * use in multithreaded programs.
14703 */
14704
14705 void
xmlInitParser(void)14706 xmlInitParser(void) {
14707 if (xmlParserInitialized != 0)
14708 return;
14709
14710 #ifdef LIBXML_THREAD_ENABLED
14711 __xmlGlobalInitMutexLock();
14712 if (xmlParserInitialized == 0) {
14713 #endif
14714 xmlInitThreads();
14715 xmlInitGlobals();
14716 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14717 (xmlGenericError == NULL))
14718 initGenericErrorDefaultFunc(NULL);
14719 xmlInitMemory();
14720 xmlInitializeDict();
14721 xmlInitCharEncodingHandlers();
14722 xmlDefaultSAXHandlerInit();
14723 xmlRegisterDefaultInputCallbacks();
14724 #ifdef LIBXML_OUTPUT_ENABLED
14725 xmlRegisterDefaultOutputCallbacks();
14726 #endif /* LIBXML_OUTPUT_ENABLED */
14727 #ifdef LIBXML_HTML_ENABLED
14728 htmlInitAutoClose();
14729 htmlDefaultSAXHandlerInit();
14730 #endif
14731 #ifdef LIBXML_XPATH_ENABLED
14732 xmlXPathInit();
14733 #endif
14734 xmlParserInitialized = 1;
14735 #ifdef LIBXML_THREAD_ENABLED
14736 }
14737 __xmlGlobalInitMutexUnlock();
14738 #endif
14739 }
14740
14741 /**
14742 * xmlCleanupParser:
14743 *
14744 * This function name is somewhat misleading. It does not clean up
14745 * parser state, it cleans up memory allocated by the library itself.
14746 * It is a cleanup function for the XML library. It tries to reclaim all
14747 * related global memory allocated for the library processing.
14748 * It doesn't deallocate any document related memory. One should
14749 * call xmlCleanupParser() only when the process has finished using
14750 * the library and all XML/HTML documents built with it.
14751 * See also xmlInitParser() which has the opposite function of preparing
14752 * the library for operations.
14753 *
14754 * WARNING: if your application is multithreaded or has plugin support
14755 * calling this may crash the application if another thread or
14756 * a plugin is still using libxml2. It's sometimes very hard to
14757 * guess if libxml2 is in use in the application, some libraries
14758 * or plugins may use it without notice. In case of doubt abstain
14759 * from calling this function or do it just before calling exit()
14760 * to avoid leak reports from valgrind !
14761 */
14762
14763 void
xmlCleanupParser(void)14764 xmlCleanupParser(void) {
14765 if (!xmlParserInitialized)
14766 return;
14767
14768 xmlCleanupCharEncodingHandlers();
14769 #ifdef LIBXML_CATALOG_ENABLED
14770 xmlCatalogCleanup();
14771 #endif
14772 xmlDictCleanup();
14773 xmlCleanupInputCallbacks();
14774 #ifdef LIBXML_OUTPUT_ENABLED
14775 xmlCleanupOutputCallbacks();
14776 #endif
14777 #ifdef LIBXML_SCHEMAS_ENABLED
14778 xmlSchemaCleanupTypes();
14779 xmlRelaxNGCleanupTypes();
14780 #endif
14781 xmlResetLastError();
14782 xmlCleanupGlobals();
14783 xmlCleanupThreads(); /* must be last if called not from the main thread */
14784 xmlCleanupMemory();
14785 xmlParserInitialized = 0;
14786 }
14787
14788 /************************************************************************
14789 * *
14790 * New set (2.6.0) of simpler and more flexible APIs *
14791 * *
14792 ************************************************************************/
14793
14794 /**
14795 * DICT_FREE:
14796 * @str: a string
14797 *
14798 * Free a string if it is not owned by the "dict" dictionary in the
14799 * current scope
14800 */
14801 #define DICT_FREE(str) \
14802 if ((str) && ((!dict) || \
14803 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14804 xmlFree((char *)(str));
14805
14806 /**
14807 * xmlCtxtReset:
14808 * @ctxt: an XML parser context
14809 *
14810 * Reset a parser context
14811 */
14812 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14813 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14814 {
14815 xmlParserInputPtr input;
14816 xmlDictPtr dict;
14817
14818 if (ctxt == NULL)
14819 return;
14820
14821 dict = ctxt->dict;
14822
14823 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14824 xmlFreeInputStream(input);
14825 }
14826 ctxt->inputNr = 0;
14827 ctxt->input = NULL;
14828
14829 ctxt->spaceNr = 0;
14830 if (ctxt->spaceTab != NULL) {
14831 ctxt->spaceTab[0] = -1;
14832 ctxt->space = &ctxt->spaceTab[0];
14833 } else {
14834 ctxt->space = NULL;
14835 }
14836
14837
14838 ctxt->nodeNr = 0;
14839 ctxt->node = NULL;
14840
14841 ctxt->nameNr = 0;
14842 ctxt->name = NULL;
14843
14844 DICT_FREE(ctxt->version);
14845 ctxt->version = NULL;
14846 DICT_FREE(ctxt->encoding);
14847 ctxt->encoding = NULL;
14848 DICT_FREE(ctxt->directory);
14849 ctxt->directory = NULL;
14850 DICT_FREE(ctxt->extSubURI);
14851 ctxt->extSubURI = NULL;
14852 DICT_FREE(ctxt->extSubSystem);
14853 ctxt->extSubSystem = NULL;
14854 if (ctxt->myDoc != NULL)
14855 xmlFreeDoc(ctxt->myDoc);
14856 ctxt->myDoc = NULL;
14857
14858 ctxt->standalone = -1;
14859 ctxt->hasExternalSubset = 0;
14860 ctxt->hasPErefs = 0;
14861 ctxt->html = 0;
14862 ctxt->external = 0;
14863 ctxt->instate = XML_PARSER_START;
14864 ctxt->token = 0;
14865
14866 ctxt->wellFormed = 1;
14867 ctxt->nsWellFormed = 1;
14868 ctxt->disableSAX = 0;
14869 ctxt->valid = 1;
14870 #if 0
14871 ctxt->vctxt.userData = ctxt;
14872 ctxt->vctxt.error = xmlParserValidityError;
14873 ctxt->vctxt.warning = xmlParserValidityWarning;
14874 #endif
14875 ctxt->record_info = 0;
14876 ctxt->nbChars = 0;
14877 ctxt->checkIndex = 0;
14878 ctxt->inSubset = 0;
14879 ctxt->errNo = XML_ERR_OK;
14880 ctxt->depth = 0;
14881 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14882 ctxt->catalogs = NULL;
14883 ctxt->nbentities = 0;
14884 ctxt->sizeentities = 0;
14885 ctxt->sizeentcopy = 0;
14886 xmlInitNodeInfoSeq(&ctxt->node_seq);
14887
14888 if (ctxt->attsDefault != NULL) {
14889 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14890 ctxt->attsDefault = NULL;
14891 }
14892 if (ctxt->attsSpecial != NULL) {
14893 xmlHashFree(ctxt->attsSpecial, NULL);
14894 ctxt->attsSpecial = NULL;
14895 }
14896
14897 #ifdef LIBXML_CATALOG_ENABLED
14898 if (ctxt->catalogs != NULL)
14899 xmlCatalogFreeLocal(ctxt->catalogs);
14900 #endif
14901 if (ctxt->lastError.code != XML_ERR_OK)
14902 xmlResetError(&ctxt->lastError);
14903 }
14904
14905 /**
14906 * xmlCtxtResetPush:
14907 * @ctxt: an XML parser context
14908 * @chunk: a pointer to an array of chars
14909 * @size: number of chars in the array
14910 * @filename: an optional file name or URI
14911 * @encoding: the document encoding, or NULL
14912 *
14913 * Reset a push parser context
14914 *
14915 * Returns 0 in case of success and 1 in case of error
14916 */
14917 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14918 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14919 int size, const char *filename, const char *encoding)
14920 {
14921 xmlParserInputPtr inputStream;
14922 xmlParserInputBufferPtr buf;
14923 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14924
14925 if (ctxt == NULL)
14926 return(1);
14927
14928 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14929 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14930
14931 buf = xmlAllocParserInputBuffer(enc);
14932 if (buf == NULL)
14933 return(1);
14934
14935 if (ctxt == NULL) {
14936 xmlFreeParserInputBuffer(buf);
14937 return(1);
14938 }
14939
14940 xmlCtxtReset(ctxt);
14941
14942 if (ctxt->pushTab == NULL) {
14943 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14944 sizeof(xmlChar *));
14945 if (ctxt->pushTab == NULL) {
14946 xmlErrMemory(ctxt, NULL);
14947 xmlFreeParserInputBuffer(buf);
14948 return(1);
14949 }
14950 }
14951
14952 if (filename == NULL) {
14953 ctxt->directory = NULL;
14954 } else {
14955 ctxt->directory = xmlParserGetDirectory(filename);
14956 }
14957
14958 inputStream = xmlNewInputStream(ctxt);
14959 if (inputStream == NULL) {
14960 xmlFreeParserInputBuffer(buf);
14961 return(1);
14962 }
14963
14964 if (filename == NULL)
14965 inputStream->filename = NULL;
14966 else
14967 inputStream->filename = (char *)
14968 xmlCanonicPath((const xmlChar *) filename);
14969 inputStream->buf = buf;
14970 xmlBufResetInput(buf->buffer, inputStream);
14971
14972 inputPush(ctxt, inputStream);
14973
14974 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14975 (ctxt->input->buf != NULL)) {
14976 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14977 size_t cur = ctxt->input->cur - ctxt->input->base;
14978
14979 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14980
14981 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14982 #ifdef DEBUG_PUSH
14983 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14984 #endif
14985 }
14986
14987 if (encoding != NULL) {
14988 xmlCharEncodingHandlerPtr hdlr;
14989
14990 if (ctxt->encoding != NULL)
14991 xmlFree((xmlChar *) ctxt->encoding);
14992 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14993
14994 hdlr = xmlFindCharEncodingHandler(encoding);
14995 if (hdlr != NULL) {
14996 xmlSwitchToEncoding(ctxt, hdlr);
14997 } else {
14998 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14999 "Unsupported encoding %s\n", BAD_CAST encoding);
15000 }
15001 } else if (enc != XML_CHAR_ENCODING_NONE) {
15002 xmlSwitchEncoding(ctxt, enc);
15003 }
15004
15005 return(0);
15006 }
15007
15008
15009 /**
15010 * xmlCtxtUseOptionsInternal:
15011 * @ctxt: an XML parser context
15012 * @options: a combination of xmlParserOption
15013 * @encoding: the user provided encoding to use
15014 *
15015 * Applies the options to the parser context
15016 *
15017 * Returns 0 in case of success, the set of unknown or unimplemented options
15018 * in case of error.
15019 */
15020 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)15021 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15022 {
15023 if (ctxt == NULL)
15024 return(-1);
15025 if (encoding != NULL) {
15026 if (ctxt->encoding != NULL)
15027 xmlFree((xmlChar *) ctxt->encoding);
15028 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15029 }
15030 if (options & XML_PARSE_RECOVER) {
15031 ctxt->recovery = 1;
15032 options -= XML_PARSE_RECOVER;
15033 ctxt->options |= XML_PARSE_RECOVER;
15034 } else
15035 ctxt->recovery = 0;
15036 if (options & XML_PARSE_DTDLOAD) {
15037 ctxt->loadsubset = XML_DETECT_IDS;
15038 options -= XML_PARSE_DTDLOAD;
15039 ctxt->options |= XML_PARSE_DTDLOAD;
15040 } else
15041 ctxt->loadsubset = 0;
15042 if (options & XML_PARSE_DTDATTR) {
15043 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15044 options -= XML_PARSE_DTDATTR;
15045 ctxt->options |= XML_PARSE_DTDATTR;
15046 }
15047 if (options & XML_PARSE_NOENT) {
15048 ctxt->replaceEntities = 1;
15049 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15050 options -= XML_PARSE_NOENT;
15051 ctxt->options |= XML_PARSE_NOENT;
15052 } else
15053 ctxt->replaceEntities = 0;
15054 if (options & XML_PARSE_PEDANTIC) {
15055 ctxt->pedantic = 1;
15056 options -= XML_PARSE_PEDANTIC;
15057 ctxt->options |= XML_PARSE_PEDANTIC;
15058 } else
15059 ctxt->pedantic = 0;
15060 if (options & XML_PARSE_NOBLANKS) {
15061 ctxt->keepBlanks = 0;
15062 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15063 options -= XML_PARSE_NOBLANKS;
15064 ctxt->options |= XML_PARSE_NOBLANKS;
15065 } else
15066 ctxt->keepBlanks = 1;
15067 if (options & XML_PARSE_DTDVALID) {
15068 ctxt->validate = 1;
15069 if (options & XML_PARSE_NOWARNING)
15070 ctxt->vctxt.warning = NULL;
15071 if (options & XML_PARSE_NOERROR)
15072 ctxt->vctxt.error = NULL;
15073 options -= XML_PARSE_DTDVALID;
15074 ctxt->options |= XML_PARSE_DTDVALID;
15075 } else
15076 ctxt->validate = 0;
15077 if (options & XML_PARSE_NOWARNING) {
15078 ctxt->sax->warning = NULL;
15079 options -= XML_PARSE_NOWARNING;
15080 }
15081 if (options & XML_PARSE_NOERROR) {
15082 ctxt->sax->error = NULL;
15083 ctxt->sax->fatalError = NULL;
15084 options -= XML_PARSE_NOERROR;
15085 }
15086 #ifdef LIBXML_SAX1_ENABLED
15087 if (options & XML_PARSE_SAX1) {
15088 ctxt->sax->startElement = xmlSAX2StartElement;
15089 ctxt->sax->endElement = xmlSAX2EndElement;
15090 ctxt->sax->startElementNs = NULL;
15091 ctxt->sax->endElementNs = NULL;
15092 ctxt->sax->initialized = 1;
15093 options -= XML_PARSE_SAX1;
15094 ctxt->options |= XML_PARSE_SAX1;
15095 }
15096 #endif /* LIBXML_SAX1_ENABLED */
15097 if (options & XML_PARSE_NODICT) {
15098 ctxt->dictNames = 0;
15099 options -= XML_PARSE_NODICT;
15100 ctxt->options |= XML_PARSE_NODICT;
15101 } else {
15102 ctxt->dictNames = 1;
15103 }
15104 if (options & XML_PARSE_NOCDATA) {
15105 ctxt->sax->cdataBlock = NULL;
15106 options -= XML_PARSE_NOCDATA;
15107 ctxt->options |= XML_PARSE_NOCDATA;
15108 }
15109 if (options & XML_PARSE_NSCLEAN) {
15110 ctxt->options |= XML_PARSE_NSCLEAN;
15111 options -= XML_PARSE_NSCLEAN;
15112 }
15113 if (options & XML_PARSE_NONET) {
15114 ctxt->options |= XML_PARSE_NONET;
15115 options -= XML_PARSE_NONET;
15116 }
15117 if (options & XML_PARSE_COMPACT) {
15118 ctxt->options |= XML_PARSE_COMPACT;
15119 options -= XML_PARSE_COMPACT;
15120 }
15121 if (options & XML_PARSE_OLD10) {
15122 ctxt->options |= XML_PARSE_OLD10;
15123 options -= XML_PARSE_OLD10;
15124 }
15125 if (options & XML_PARSE_NOBASEFIX) {
15126 ctxt->options |= XML_PARSE_NOBASEFIX;
15127 options -= XML_PARSE_NOBASEFIX;
15128 }
15129 if (options & XML_PARSE_HUGE) {
15130 ctxt->options |= XML_PARSE_HUGE;
15131 options -= XML_PARSE_HUGE;
15132 if (ctxt->dict != NULL)
15133 xmlDictSetLimit(ctxt->dict, 0);
15134 }
15135 if (options & XML_PARSE_OLDSAX) {
15136 ctxt->options |= XML_PARSE_OLDSAX;
15137 options -= XML_PARSE_OLDSAX;
15138 }
15139 if (options & XML_PARSE_IGNORE_ENC) {
15140 ctxt->options |= XML_PARSE_IGNORE_ENC;
15141 options -= XML_PARSE_IGNORE_ENC;
15142 }
15143 if (options & XML_PARSE_BIG_LINES) {
15144 ctxt->options |= XML_PARSE_BIG_LINES;
15145 options -= XML_PARSE_BIG_LINES;
15146 }
15147 ctxt->linenumbers = 1;
15148 return (options);
15149 }
15150
15151 /**
15152 * xmlCtxtUseOptions:
15153 * @ctxt: an XML parser context
15154 * @options: a combination of xmlParserOption
15155 *
15156 * Applies the options to the parser context
15157 *
15158 * Returns 0 in case of success, the set of unknown or unimplemented options
15159 * in case of error.
15160 */
15161 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)15162 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15163 {
15164 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15165 }
15166
15167 /**
15168 * xmlDoRead:
15169 * @ctxt: an XML parser context
15170 * @URL: the base URL to use for the document
15171 * @encoding: the document encoding, or NULL
15172 * @options: a combination of xmlParserOption
15173 * @reuse: keep the context for reuse
15174 *
15175 * Common front-end for the xmlRead functions
15176 *
15177 * Returns the resulting document tree or NULL
15178 */
15179 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)15180 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15181 int options, int reuse)
15182 {
15183 xmlDocPtr ret;
15184
15185 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15186 if (encoding != NULL) {
15187 xmlCharEncodingHandlerPtr hdlr;
15188
15189 hdlr = xmlFindCharEncodingHandler(encoding);
15190 if (hdlr != NULL)
15191 xmlSwitchToEncoding(ctxt, hdlr);
15192 }
15193 if ((URL != NULL) && (ctxt->input != NULL) &&
15194 (ctxt->input->filename == NULL))
15195 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15196 xmlParseDocument(ctxt);
15197 if ((ctxt->wellFormed) || ctxt->recovery)
15198 ret = ctxt->myDoc;
15199 else {
15200 ret = NULL;
15201 if (ctxt->myDoc != NULL) {
15202 xmlFreeDoc(ctxt->myDoc);
15203 }
15204 }
15205 ctxt->myDoc = NULL;
15206 if (!reuse) {
15207 xmlFreeParserCtxt(ctxt);
15208 }
15209
15210 return (ret);
15211 }
15212
15213 /**
15214 * xmlReadDoc:
15215 * @cur: a pointer to a zero terminated string
15216 * @URL: the base URL to use for the document
15217 * @encoding: the document encoding, or NULL
15218 * @options: a combination of xmlParserOption
15219 *
15220 * parse an XML in-memory document and build a tree.
15221 *
15222 * Returns the resulting document tree
15223 */
15224 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)15225 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15226 {
15227 xmlParserCtxtPtr ctxt;
15228
15229 if (cur == NULL)
15230 return (NULL);
15231 xmlInitParser();
15232
15233 ctxt = xmlCreateDocParserCtxt(cur);
15234 if (ctxt == NULL)
15235 return (NULL);
15236 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15237 }
15238
15239 /**
15240 * xmlReadFile:
15241 * @filename: a file or URL
15242 * @encoding: the document encoding, or NULL
15243 * @options: a combination of xmlParserOption
15244 *
15245 * parse an XML file from the filesystem or the network.
15246 *
15247 * Returns the resulting document tree
15248 */
15249 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)15250 xmlReadFile(const char *filename, const char *encoding, int options)
15251 {
15252 xmlParserCtxtPtr ctxt;
15253
15254 xmlInitParser();
15255 ctxt = xmlCreateURLParserCtxt(filename, options);
15256 if (ctxt == NULL)
15257 return (NULL);
15258 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15259 }
15260
15261 /**
15262 * xmlReadMemory:
15263 * @buffer: a pointer to a char array
15264 * @size: the size of the array
15265 * @URL: the base URL to use for the document
15266 * @encoding: the document encoding, or NULL
15267 * @options: a combination of xmlParserOption
15268 *
15269 * parse an XML in-memory document and build a tree.
15270 *
15271 * Returns the resulting document tree
15272 */
15273 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)15274 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15275 {
15276 xmlParserCtxtPtr ctxt;
15277
15278 xmlInitParser();
15279 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15280 if (ctxt == NULL)
15281 return (NULL);
15282 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15283 }
15284
15285 /**
15286 * xmlReadFd:
15287 * @fd: an open file descriptor
15288 * @URL: the base URL to use for the document
15289 * @encoding: the document encoding, or NULL
15290 * @options: a combination of xmlParserOption
15291 *
15292 * parse an XML from a file descriptor and build a tree.
15293 * NOTE that the file descriptor will not be closed when the
15294 * reader is closed or reset.
15295 *
15296 * Returns the resulting document tree
15297 */
15298 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)15299 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15300 {
15301 xmlParserCtxtPtr ctxt;
15302 xmlParserInputBufferPtr input;
15303 xmlParserInputPtr stream;
15304
15305 if (fd < 0)
15306 return (NULL);
15307 xmlInitParser();
15308
15309 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15310 if (input == NULL)
15311 return (NULL);
15312 input->closecallback = NULL;
15313 ctxt = xmlNewParserCtxt();
15314 if (ctxt == NULL) {
15315 xmlFreeParserInputBuffer(input);
15316 return (NULL);
15317 }
15318 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15319 if (stream == NULL) {
15320 xmlFreeParserInputBuffer(input);
15321 xmlFreeParserCtxt(ctxt);
15322 return (NULL);
15323 }
15324 inputPush(ctxt, stream);
15325 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15326 }
15327
15328 /**
15329 * xmlReadIO:
15330 * @ioread: an I/O read function
15331 * @ioclose: an I/O close function
15332 * @ioctx: an I/O handler
15333 * @URL: the base URL to use for the document
15334 * @encoding: the document encoding, or NULL
15335 * @options: a combination of xmlParserOption
15336 *
15337 * parse an XML document from I/O functions and source and build a tree.
15338 *
15339 * Returns the resulting document tree
15340 */
15341 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15342 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15343 void *ioctx, const char *URL, const char *encoding, int options)
15344 {
15345 xmlParserCtxtPtr ctxt;
15346 xmlParserInputBufferPtr input;
15347 xmlParserInputPtr stream;
15348
15349 if (ioread == NULL)
15350 return (NULL);
15351 xmlInitParser();
15352
15353 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15354 XML_CHAR_ENCODING_NONE);
15355 if (input == NULL) {
15356 if (ioclose != NULL)
15357 ioclose(ioctx);
15358 return (NULL);
15359 }
15360 ctxt = xmlNewParserCtxt();
15361 if (ctxt == NULL) {
15362 xmlFreeParserInputBuffer(input);
15363 return (NULL);
15364 }
15365 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15366 if (stream == NULL) {
15367 xmlFreeParserInputBuffer(input);
15368 xmlFreeParserCtxt(ctxt);
15369 return (NULL);
15370 }
15371 inputPush(ctxt, stream);
15372 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15373 }
15374
15375 /**
15376 * xmlCtxtReadDoc:
15377 * @ctxt: an XML parser context
15378 * @cur: a pointer to a zero terminated string
15379 * @URL: the base URL to use for the document
15380 * @encoding: the document encoding, or NULL
15381 * @options: a combination of xmlParserOption
15382 *
15383 * parse an XML in-memory document and build a tree.
15384 * This reuses the existing @ctxt parser context
15385 *
15386 * Returns the resulting document tree
15387 */
15388 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)15389 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15390 const char *URL, const char *encoding, int options)
15391 {
15392 xmlParserInputPtr stream;
15393
15394 if (cur == NULL)
15395 return (NULL);
15396 if (ctxt == NULL)
15397 return (NULL);
15398 xmlInitParser();
15399
15400 xmlCtxtReset(ctxt);
15401
15402 stream = xmlNewStringInputStream(ctxt, cur);
15403 if (stream == NULL) {
15404 return (NULL);
15405 }
15406 inputPush(ctxt, stream);
15407 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15408 }
15409
15410 /**
15411 * xmlCtxtReadFile:
15412 * @ctxt: an XML parser context
15413 * @filename: a file or URL
15414 * @encoding: the document encoding, or NULL
15415 * @options: a combination of xmlParserOption
15416 *
15417 * parse an XML file from the filesystem or the network.
15418 * This reuses the existing @ctxt parser context
15419 *
15420 * Returns the resulting document tree
15421 */
15422 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15423 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15424 const char *encoding, int options)
15425 {
15426 xmlParserInputPtr stream;
15427
15428 if (filename == NULL)
15429 return (NULL);
15430 if (ctxt == NULL)
15431 return (NULL);
15432 xmlInitParser();
15433
15434 xmlCtxtReset(ctxt);
15435
15436 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15437 if (stream == NULL) {
15438 return (NULL);
15439 }
15440 inputPush(ctxt, stream);
15441 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15442 }
15443
15444 /**
15445 * xmlCtxtReadMemory:
15446 * @ctxt: an XML parser context
15447 * @buffer: a pointer to a char array
15448 * @size: the size of the array
15449 * @URL: the base URL to use for the document
15450 * @encoding: the document encoding, or NULL
15451 * @options: a combination of xmlParserOption
15452 *
15453 * parse an XML in-memory document and build a tree.
15454 * This reuses the existing @ctxt parser context
15455 *
15456 * Returns the resulting document tree
15457 */
15458 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15459 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15460 const char *URL, const char *encoding, int options)
15461 {
15462 xmlParserInputBufferPtr input;
15463 xmlParserInputPtr stream;
15464
15465 if (ctxt == NULL)
15466 return (NULL);
15467 if (buffer == NULL)
15468 return (NULL);
15469 xmlInitParser();
15470
15471 xmlCtxtReset(ctxt);
15472
15473 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15474 if (input == NULL) {
15475 return(NULL);
15476 }
15477
15478 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15479 if (stream == NULL) {
15480 xmlFreeParserInputBuffer(input);
15481 return(NULL);
15482 }
15483
15484 inputPush(ctxt, stream);
15485 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15486 }
15487
15488 /**
15489 * xmlCtxtReadFd:
15490 * @ctxt: an XML parser context
15491 * @fd: an open file descriptor
15492 * @URL: the base URL to use for the document
15493 * @encoding: the document encoding, or NULL
15494 * @options: a combination of xmlParserOption
15495 *
15496 * parse an XML from a file descriptor and build a tree.
15497 * This reuses the existing @ctxt parser context
15498 * NOTE that the file descriptor will not be closed when the
15499 * reader is closed or reset.
15500 *
15501 * Returns the resulting document tree
15502 */
15503 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15504 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15505 const char *URL, const char *encoding, int options)
15506 {
15507 xmlParserInputBufferPtr input;
15508 xmlParserInputPtr stream;
15509
15510 if (fd < 0)
15511 return (NULL);
15512 if (ctxt == NULL)
15513 return (NULL);
15514 xmlInitParser();
15515
15516 xmlCtxtReset(ctxt);
15517
15518
15519 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15520 if (input == NULL)
15521 return (NULL);
15522 input->closecallback = NULL;
15523 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15524 if (stream == NULL) {
15525 xmlFreeParserInputBuffer(input);
15526 return (NULL);
15527 }
15528 inputPush(ctxt, stream);
15529 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15530 }
15531
15532 /**
15533 * xmlCtxtReadIO:
15534 * @ctxt: an XML parser context
15535 * @ioread: an I/O read function
15536 * @ioclose: an I/O close function
15537 * @ioctx: an I/O handler
15538 * @URL: the base URL to use for the document
15539 * @encoding: the document encoding, or NULL
15540 * @options: a combination of xmlParserOption
15541 *
15542 * parse an XML document from I/O functions and source and build a tree.
15543 * This reuses the existing @ctxt parser context
15544 *
15545 * Returns the resulting document tree
15546 */
15547 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15548 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15549 xmlInputCloseCallback ioclose, void *ioctx,
15550 const char *URL,
15551 const char *encoding, int options)
15552 {
15553 xmlParserInputBufferPtr input;
15554 xmlParserInputPtr stream;
15555
15556 if (ioread == NULL)
15557 return (NULL);
15558 if (ctxt == NULL)
15559 return (NULL);
15560 xmlInitParser();
15561
15562 xmlCtxtReset(ctxt);
15563
15564 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15565 XML_CHAR_ENCODING_NONE);
15566 if (input == NULL) {
15567 if (ioclose != NULL)
15568 ioclose(ioctx);
15569 return (NULL);
15570 }
15571 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15572 if (stream == NULL) {
15573 xmlFreeParserInputBuffer(input);
15574 return (NULL);
15575 }
15576 inputPush(ctxt, stream);
15577 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15578 }
15579
15580 #define bottom_parser
15581 #include "elfgcchack.h"
15582