1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/parser.h>
55 #include <libxml/xmlmemory.h>
56 #include <libxml/tree.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #include <libxml/SAX2.h>
65 #ifdef LIBXML_CATALOG_ENABLED
66 #include <libxml/catalog.h>
67 #endif
68
69 #include "private/buf.h"
70 #include "private/dict.h"
71 #include "private/entities.h"
72 #include "private/error.h"
73 #include "private/html.h"
74 #include "private/io.h"
75 #include "private/parser.h"
76
77 #define NS_INDEX_EMPTY INT_MAX
78 #define NS_INDEX_XML (INT_MAX - 1)
79 #define URI_HASH_EMPTY 0xD943A04E
80 #define URI_HASH_XML 0xF0451F02
81
82 struct _xmlStartTag {
83 const xmlChar *prefix;
84 const xmlChar *URI;
85 int line;
86 int nsNr;
87 };
88
89 typedef struct {
90 void *saxData;
91 unsigned prefixHashValue;
92 unsigned uriHashValue;
93 unsigned elementId;
94 int oldIndex;
95 } xmlParserNsExtra;
96
97 typedef struct {
98 unsigned hashValue;
99 int index;
100 } xmlParserNsBucket;
101
102 struct _xmlParserNsData {
103 xmlParserNsExtra *extra;
104
105 unsigned hashSize;
106 unsigned hashElems;
107 xmlParserNsBucket *hash;
108
109 unsigned elementId;
110 int defaultNsIndex;
111 };
112
113 struct _xmlAttrHashBucket {
114 unsigned hashValue;
115 int index;
116 };
117
118 static xmlParserCtxtPtr
119 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
120 const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
121 xmlParserCtxtPtr pctx);
122
123 static int
124 xmlParseElementStart(xmlParserCtxtPtr ctxt);
125
126 static void
127 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
128
129 /************************************************************************
130 * *
131 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
132 * *
133 ************************************************************************/
134
135 #define XML_PARSER_BIG_ENTITY 1000
136 #define XML_PARSER_LOT_ENTITY 5000
137
138 /*
139 * Constants for protection against abusive entity expansion
140 * ("billion laughs").
141 */
142
143 /*
144 * A certain amount of entity expansion which is always allowed.
145 */
146 #define XML_PARSER_ALLOWED_EXPANSION 1000000
147
148 /*
149 * Fixed cost for each entity reference. This crudely models processing time
150 * as well to protect, for example, against exponential expansion of empty
151 * or very short entities.
152 */
153 #define XML_ENT_FIXED_COST 20
154
155 /**
156 * xmlParserMaxDepth:
157 *
158 * arbitrary depth limit for the XML documents that we allow to
159 * process. This is not a limitation of the parser but a safety
160 * boundary feature. It can be disabled with the XML_PARSE_HUGE
161 * parser option.
162 */
163 unsigned int xmlParserMaxDepth = 256;
164
165
166
167 #define XML_PARSER_BIG_BUFFER_SIZE 300
168 #define XML_PARSER_BUFFER_SIZE 100
169 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
170
171 /**
172 * XML_PARSER_CHUNK_SIZE
173 *
174 * When calling GROW that's the minimal amount of data
175 * the parser expected to have received. It is not a hard
176 * limit but an optimization when reading strings like Names
177 * It is not strictly needed as long as inputs available characters
178 * are followed by 0, which should be provided by the I/O level
179 */
180 #define XML_PARSER_CHUNK_SIZE 100
181
182 /**
183 * xmlParserVersion:
184 *
185 * Constant string describing the internal version of the library
186 */
187 const char *const
188 xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
189
190 /*
191 * List of XML prefixed PI allowed by W3C specs
192 */
193
194 static const char* const xmlW3CPIs[] = {
195 "xml-stylesheet",
196 "xml-model",
197 NULL
198 };
199
200
201 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
202 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
203 const xmlChar **str);
204
205 static xmlParserErrors
206 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
207 xmlSAXHandlerPtr sax,
208 void *user_data, int depth, const xmlChar *URL,
209 const xmlChar *ID, xmlNodePtr *list);
210
211 static int
212 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
213 const char *encoding);
214 #ifdef LIBXML_LEGACY_ENABLED
215 static void
216 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
217 xmlNodePtr lastNode);
218 #endif /* LIBXML_LEGACY_ENABLED */
219
220 static xmlParserErrors
221 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
222 const xmlChar *string, void *user_data, xmlNodePtr *lst);
223
224 static int
225 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
226
227 /************************************************************************
228 * *
229 * Some factorized error routines *
230 * *
231 ************************************************************************/
232
233 /**
234 * xmlErrAttributeDup:
235 * @ctxt: an XML parser context
236 * @prefix: the attribute prefix
237 * @localname: the attribute localname
238 *
239 * Handle a redefinition of attribute error
240 */
241 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)242 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
243 const xmlChar * localname)
244 {
245 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
246 (ctxt->instate == XML_PARSER_EOF))
247 return;
248 if (ctxt != NULL)
249 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
250
251 if (prefix == NULL)
252 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
253 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
254 (const char *) localname, NULL, NULL, 0, 0,
255 "Attribute %s redefined\n", localname);
256 else
257 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
258 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
259 (const char *) prefix, (const char *) localname,
260 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
261 localname);
262 if (ctxt != NULL) {
263 ctxt->wellFormed = 0;
264 if (ctxt->recovery == 0)
265 ctxt->disableSAX = 1;
266 }
267 }
268
269 /**
270 * xmlFatalErrMsg:
271 * @ctxt: an XML parser context
272 * @error: the error number
273 * @msg: the error message
274 *
275 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
276 */
277 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)278 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
279 const char *msg)
280 {
281 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
282 (ctxt->instate == XML_PARSER_EOF))
283 return;
284 if (ctxt != NULL)
285 ctxt->errNo = error;
286 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
287 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
288 if (ctxt != NULL) {
289 ctxt->wellFormed = 0;
290 if (ctxt->recovery == 0)
291 ctxt->disableSAX = 1;
292 }
293 }
294
295 /**
296 * xmlWarningMsg:
297 * @ctxt: an XML parser context
298 * @error: the error number
299 * @msg: the error message
300 * @str1: extra data
301 * @str2: extra data
302 *
303 * Handle a warning.
304 */
305 void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)306 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
307 const char *msg, const xmlChar *str1, const xmlChar *str2)
308 {
309 xmlStructuredErrorFunc schannel = NULL;
310
311 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
312 (ctxt->instate == XML_PARSER_EOF))
313 return;
314 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
315 (ctxt->sax->initialized == XML_SAX2_MAGIC))
316 schannel = ctxt->sax->serror;
317 if (ctxt != NULL) {
318 __xmlRaiseError(schannel,
319 (ctxt->sax) ? ctxt->sax->warning : NULL,
320 ctxt->userData,
321 ctxt, NULL, XML_FROM_PARSER, error,
322 XML_ERR_WARNING, NULL, 0,
323 (const char *) str1, (const char *) str2, NULL, 0, 0,
324 msg, (const char *) str1, (const char *) str2);
325 } else {
326 __xmlRaiseError(schannel, NULL, NULL,
327 ctxt, NULL, XML_FROM_PARSER, error,
328 XML_ERR_WARNING, NULL, 0,
329 (const char *) str1, (const char *) str2, NULL, 0, 0,
330 msg, (const char *) str1, (const char *) str2);
331 }
332 }
333
334 /**
335 * xmlValidityError:
336 * @ctxt: an XML parser context
337 * @error: the error number
338 * @msg: the error message
339 * @str1: extra data
340 *
341 * Handle a validity error.
342 */
343 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)344 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
345 const char *msg, const xmlChar *str1, const xmlChar *str2)
346 {
347 xmlStructuredErrorFunc schannel = NULL;
348
349 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
350 (ctxt->instate == XML_PARSER_EOF))
351 return;
352 if (ctxt != NULL) {
353 ctxt->errNo = error;
354 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
355 schannel = ctxt->sax->serror;
356 }
357 if (ctxt != NULL) {
358 __xmlRaiseError(schannel,
359 ctxt->vctxt.error, ctxt->vctxt.userData,
360 ctxt, NULL, XML_FROM_DTD, error,
361 XML_ERR_ERROR, NULL, 0, (const char *) str1,
362 (const char *) str2, NULL, 0, 0,
363 msg, (const char *) str1, (const char *) str2);
364 ctxt->valid = 0;
365 } else {
366 __xmlRaiseError(schannel, NULL, NULL,
367 ctxt, NULL, XML_FROM_DTD, error,
368 XML_ERR_ERROR, NULL, 0, (const char *) str1,
369 (const char *) str2, NULL, 0, 0,
370 msg, (const char *) str1, (const char *) str2);
371 }
372 }
373
374 /**
375 * xmlFatalErrMsgInt:
376 * @ctxt: an XML parser context
377 * @error: the error number
378 * @msg: the error message
379 * @val: an integer value
380 *
381 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
382 */
383 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)384 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
385 const char *msg, int val)
386 {
387 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
388 (ctxt->instate == XML_PARSER_EOF))
389 return;
390 if (ctxt != NULL)
391 ctxt->errNo = error;
392 __xmlRaiseError(NULL, NULL, NULL,
393 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
394 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
395 if (ctxt != NULL) {
396 ctxt->wellFormed = 0;
397 if (ctxt->recovery == 0)
398 ctxt->disableSAX = 1;
399 }
400 }
401
402 /**
403 * xmlFatalErrMsgStrIntStr:
404 * @ctxt: an XML parser context
405 * @error: the error number
406 * @msg: the error message
407 * @str1: an string info
408 * @val: an integer value
409 * @str2: an string info
410 *
411 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
412 */
413 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)414 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, int val,
416 const xmlChar *str2)
417 {
418 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
419 (ctxt->instate == XML_PARSER_EOF))
420 return;
421 if (ctxt != NULL)
422 ctxt->errNo = error;
423 __xmlRaiseError(NULL, NULL, NULL,
424 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
425 NULL, 0, (const char *) str1, (const char *) str2,
426 NULL, val, 0, msg, str1, val, str2);
427 if (ctxt != NULL) {
428 ctxt->wellFormed = 0;
429 if (ctxt->recovery == 0)
430 ctxt->disableSAX = 1;
431 }
432 }
433
434 /**
435 * xmlFatalErrMsgStr:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @val: a string value
440 *
441 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
442 */
443 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)444 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar * val)
446 {
447 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
448 (ctxt->instate == XML_PARSER_EOF))
449 return;
450 if (ctxt != NULL)
451 ctxt->errNo = error;
452 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
453 XML_FROM_PARSER, error, XML_ERR_FATAL,
454 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
455 val);
456 if (ctxt != NULL) {
457 ctxt->wellFormed = 0;
458 if (ctxt->recovery == 0)
459 ctxt->disableSAX = 1;
460 }
461 }
462
463 /**
464 * xmlErrMsgStr:
465 * @ctxt: an XML parser context
466 * @error: the error number
467 * @msg: the error message
468 * @val: a string value
469 *
470 * Handle a non fatal parser error
471 */
472 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)473 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
474 const char *msg, const xmlChar * val)
475 {
476 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
477 (ctxt->instate == XML_PARSER_EOF))
478 return;
479 if (ctxt != NULL)
480 ctxt->errNo = error;
481 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
482 XML_FROM_PARSER, error, XML_ERR_ERROR,
483 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
484 val);
485 }
486
487 /**
488 * xmlNsErr:
489 * @ctxt: an XML parser context
490 * @error: the error number
491 * @msg: the message
492 * @info1: extra information string
493 * @info2: extra information string
494 *
495 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
496 */
497 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)498 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
499 const char *msg,
500 const xmlChar * info1, const xmlChar * info2,
501 const xmlChar * info3)
502 {
503 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
504 (ctxt->instate == XML_PARSER_EOF))
505 return;
506 if (ctxt != NULL)
507 ctxt->errNo = error;
508 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
509 XML_ERR_ERROR, NULL, 0, (const char *) info1,
510 (const char *) info2, (const char *) info3, 0, 0, msg,
511 info1, info2, info3);
512 if (ctxt != NULL)
513 ctxt->nsWellFormed = 0;
514 }
515
516 /**
517 * xmlNsWarn
518 * @ctxt: an XML parser context
519 * @error: the error number
520 * @msg: the message
521 * @info1: extra information string
522 * @info2: extra information string
523 *
524 * Handle a namespace warning error
525 */
526 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)527 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
528 const char *msg,
529 const xmlChar * info1, const xmlChar * info2,
530 const xmlChar * info3)
531 {
532 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
533 (ctxt->instate == XML_PARSER_EOF))
534 return;
535 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
536 XML_ERR_WARNING, NULL, 0, (const char *) info1,
537 (const char *) info2, (const char *) info3, 0, 0, msg,
538 info1, info2, info3);
539 }
540
541 static void
xmlSaturatedAdd(unsigned long * dst,unsigned long val)542 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
543 if (val > ULONG_MAX - *dst)
544 *dst = ULONG_MAX;
545 else
546 *dst += val;
547 }
548
549 static void
xmlSaturatedAddSizeT(unsigned long * dst,unsigned long val)550 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
551 if (val > ULONG_MAX - *dst)
552 *dst = ULONG_MAX;
553 else
554 *dst += val;
555 }
556
557 /**
558 * xmlParserEntityCheck:
559 * @ctxt: parser context
560 * @extra: sum of unexpanded entity sizes
561 *
562 * Check for non-linear entity expansion behaviour.
563 *
564 * In some cases like xmlStringDecodeEntities, this function is called
565 * for each, possibly nested entity and its unexpanded content length.
566 *
567 * In other cases like xmlParseReference, it's only called for each
568 * top-level entity with its unexpanded content length plus the sum of
569 * the unexpanded content lengths (plus fixed cost) of all nested
570 * entities.
571 *
572 * Summing the unexpanded lengths also adds the length of the reference.
573 * This is by design. Taking the length of the entity name into account
574 * discourages attacks that try to waste CPU time with abusively long
575 * entity names. See test/recurse/lol6.xml for example. Each call also
576 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
577 * short entities.
578 *
579 * Returns 1 on error, 0 on success.
580 */
581 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long extra)582 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
583 {
584 unsigned long consumed;
585 xmlParserInputPtr input = ctxt->input;
586 xmlEntityPtr entity = input->entity;
587
588 /*
589 * Compute total consumed bytes so far, including input streams of
590 * external entities.
591 */
592 consumed = input->parentConsumed;
593 if ((entity == NULL) ||
594 ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
595 ((entity->flags & XML_ENT_PARSED) == 0))) {
596 xmlSaturatedAdd(&consumed, input->consumed);
597 xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
598 }
599 xmlSaturatedAdd(&consumed, ctxt->sizeentities);
600
601 /*
602 * Add extra cost and some fixed cost.
603 */
604 xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
605 xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
606
607 /*
608 * It's important to always use saturation arithmetic when tracking
609 * entity sizes to make the size checks reliable. If "sizeentcopy"
610 * overflows, we have to abort.
611 */
612 if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
613 ((ctxt->sizeentcopy >= ULONG_MAX) ||
614 (ctxt->sizeentcopy / ctxt->maxAmpl > consumed))) {
615 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
616 "Maximum entity amplification factor exceeded, see "
617 "xmlCtxtSetMaxAmplification.\n");
618 xmlHaltParser(ctxt);
619 return(1);
620 }
621
622 return(0);
623 }
624
625 /************************************************************************
626 * *
627 * Library wide options *
628 * *
629 ************************************************************************/
630
631 /**
632 * xmlHasFeature:
633 * @feature: the feature to be examined
634 *
635 * Examines if the library has been compiled with a given feature.
636 *
637 * Returns a non-zero value if the feature exist, otherwise zero.
638 * Returns zero (0) if the feature does not exist or an unknown
639 * unknown feature is requested, non-zero otherwise.
640 */
641 int
xmlHasFeature(xmlFeature feature)642 xmlHasFeature(xmlFeature feature)
643 {
644 switch (feature) {
645 case XML_WITH_THREAD:
646 #ifdef LIBXML_THREAD_ENABLED
647 return(1);
648 #else
649 return(0);
650 #endif
651 case XML_WITH_TREE:
652 #ifdef LIBXML_TREE_ENABLED
653 return(1);
654 #else
655 return(0);
656 #endif
657 case XML_WITH_OUTPUT:
658 #ifdef LIBXML_OUTPUT_ENABLED
659 return(1);
660 #else
661 return(0);
662 #endif
663 case XML_WITH_PUSH:
664 #ifdef LIBXML_PUSH_ENABLED
665 return(1);
666 #else
667 return(0);
668 #endif
669 case XML_WITH_READER:
670 #ifdef LIBXML_READER_ENABLED
671 return(1);
672 #else
673 return(0);
674 #endif
675 case XML_WITH_PATTERN:
676 #ifdef LIBXML_PATTERN_ENABLED
677 return(1);
678 #else
679 return(0);
680 #endif
681 case XML_WITH_WRITER:
682 #ifdef LIBXML_WRITER_ENABLED
683 return(1);
684 #else
685 return(0);
686 #endif
687 case XML_WITH_SAX1:
688 #ifdef LIBXML_SAX1_ENABLED
689 return(1);
690 #else
691 return(0);
692 #endif
693 case XML_WITH_FTP:
694 #ifdef LIBXML_FTP_ENABLED
695 return(1);
696 #else
697 return(0);
698 #endif
699 case XML_WITH_HTTP:
700 #ifdef LIBXML_HTTP_ENABLED
701 return(1);
702 #else
703 return(0);
704 #endif
705 case XML_WITH_VALID:
706 #ifdef LIBXML_VALID_ENABLED
707 return(1);
708 #else
709 return(0);
710 #endif
711 case XML_WITH_HTML:
712 #ifdef LIBXML_HTML_ENABLED
713 return(1);
714 #else
715 return(0);
716 #endif
717 case XML_WITH_LEGACY:
718 #ifdef LIBXML_LEGACY_ENABLED
719 return(1);
720 #else
721 return(0);
722 #endif
723 case XML_WITH_C14N:
724 #ifdef LIBXML_C14N_ENABLED
725 return(1);
726 #else
727 return(0);
728 #endif
729 case XML_WITH_CATALOG:
730 #ifdef LIBXML_CATALOG_ENABLED
731 return(1);
732 #else
733 return(0);
734 #endif
735 case XML_WITH_XPATH:
736 #ifdef LIBXML_XPATH_ENABLED
737 return(1);
738 #else
739 return(0);
740 #endif
741 case XML_WITH_XPTR:
742 #ifdef LIBXML_XPTR_ENABLED
743 return(1);
744 #else
745 return(0);
746 #endif
747 case XML_WITH_XINCLUDE:
748 #ifdef LIBXML_XINCLUDE_ENABLED
749 return(1);
750 #else
751 return(0);
752 #endif
753 case XML_WITH_ICONV:
754 #ifdef LIBXML_ICONV_ENABLED
755 return(1);
756 #else
757 return(0);
758 #endif
759 case XML_WITH_ISO8859X:
760 #ifdef LIBXML_ISO8859X_ENABLED
761 return(1);
762 #else
763 return(0);
764 #endif
765 case XML_WITH_UNICODE:
766 #ifdef LIBXML_UNICODE_ENABLED
767 return(1);
768 #else
769 return(0);
770 #endif
771 case XML_WITH_REGEXP:
772 #ifdef LIBXML_REGEXP_ENABLED
773 return(1);
774 #else
775 return(0);
776 #endif
777 case XML_WITH_AUTOMATA:
778 #ifdef LIBXML_AUTOMATA_ENABLED
779 return(1);
780 #else
781 return(0);
782 #endif
783 case XML_WITH_EXPR:
784 #ifdef LIBXML_EXPR_ENABLED
785 return(1);
786 #else
787 return(0);
788 #endif
789 case XML_WITH_SCHEMAS:
790 #ifdef LIBXML_SCHEMAS_ENABLED
791 return(1);
792 #else
793 return(0);
794 #endif
795 case XML_WITH_SCHEMATRON:
796 #ifdef LIBXML_SCHEMATRON_ENABLED
797 return(1);
798 #else
799 return(0);
800 #endif
801 case XML_WITH_MODULES:
802 #ifdef LIBXML_MODULES_ENABLED
803 return(1);
804 #else
805 return(0);
806 #endif
807 case XML_WITH_DEBUG:
808 #ifdef LIBXML_DEBUG_ENABLED
809 return(1);
810 #else
811 return(0);
812 #endif
813 case XML_WITH_DEBUG_MEM:
814 #ifdef DEBUG_MEMORY_LOCATION
815 return(1);
816 #else
817 return(0);
818 #endif
819 case XML_WITH_DEBUG_RUN:
820 return(0);
821 case XML_WITH_ZLIB:
822 #ifdef LIBXML_ZLIB_ENABLED
823 return(1);
824 #else
825 return(0);
826 #endif
827 case XML_WITH_LZMA:
828 #ifdef LIBXML_LZMA_ENABLED
829 return(1);
830 #else
831 return(0);
832 #endif
833 case XML_WITH_ICU:
834 #ifdef LIBXML_ICU_ENABLED
835 return(1);
836 #else
837 return(0);
838 #endif
839 default:
840 break;
841 }
842 return(0);
843 }
844
845 /************************************************************************
846 * *
847 * SAX2 defaulted attributes handling *
848 * *
849 ************************************************************************/
850
851 /**
852 * xmlDetectSAX2:
853 * @ctxt: an XML parser context
854 *
855 * Do the SAX2 detection and specific initialization
856 */
857 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)858 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
859 xmlSAXHandlerPtr sax;
860
861 /* Avoid unused variable warning if features are disabled. */
862 (void) sax;
863
864 if (ctxt == NULL) return;
865 sax = ctxt->sax;
866 #ifdef LIBXML_SAX1_ENABLED
867 if ((sax) && (sax->initialized == XML_SAX2_MAGIC))
868 ctxt->sax2 = 1;
869 #else
870 ctxt->sax2 = 1;
871 #endif /* LIBXML_SAX1_ENABLED */
872
873 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
874 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
875 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
876 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
877 (ctxt->str_xml_ns == NULL)) {
878 xmlErrMemory(ctxt, NULL);
879 }
880 }
881
882 typedef struct {
883 xmlHashedString prefix;
884 xmlHashedString name;
885 xmlHashedString value;
886 const xmlChar *valueEnd;
887 int external;
888 int expandedSize;
889 } xmlDefAttr;
890
891 typedef struct _xmlDefAttrs xmlDefAttrs;
892 typedef xmlDefAttrs *xmlDefAttrsPtr;
893 struct _xmlDefAttrs {
894 int nbAttrs; /* number of defaulted attributes on that element */
895 int maxAttrs; /* the size of the array */
896 #if __STDC_VERSION__ >= 199901L
897 /* Using a C99 flexible array member avoids UBSan errors. */
898 xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
899 #else
900 xmlDefAttr attrs[1];
901 #endif
902 };
903
904 /**
905 * xmlAttrNormalizeSpace:
906 * @src: the source string
907 * @dst: the target string
908 *
909 * Normalize the space in non CDATA attribute values:
910 * If the attribute type is not CDATA, then the XML processor MUST further
911 * process the normalized attribute value by discarding any leading and
912 * trailing space (#x20) characters, and by replacing sequences of space
913 * (#x20) characters by a single space (#x20) character.
914 * Note that the size of dst need to be at least src, and if one doesn't need
915 * to preserve dst (and it doesn't come from a dictionary or read-only) then
916 * passing src as dst is just fine.
917 *
918 * Returns a pointer to the normalized value (dst) or NULL if no conversion
919 * is needed.
920 */
921 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)922 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
923 {
924 if ((src == NULL) || (dst == NULL))
925 return(NULL);
926
927 while (*src == 0x20) src++;
928 while (*src != 0) {
929 if (*src == 0x20) {
930 while (*src == 0x20) src++;
931 if (*src != 0)
932 *dst++ = 0x20;
933 } else {
934 *dst++ = *src++;
935 }
936 }
937 *dst = 0;
938 if (dst == src)
939 return(NULL);
940 return(dst);
941 }
942
943 /**
944 * xmlAttrNormalizeSpace2:
945 * @src: the source string
946 *
947 * Normalize the space in non CDATA attribute values, a slightly more complex
948 * front end to avoid allocation problems when running on attribute values
949 * coming from the input.
950 *
951 * Returns a pointer to the normalized value (dst) or NULL if no conversion
952 * is needed.
953 */
954 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)955 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
956 {
957 int i;
958 int remove_head = 0;
959 int need_realloc = 0;
960 const xmlChar *cur;
961
962 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
963 return(NULL);
964 i = *len;
965 if (i <= 0)
966 return(NULL);
967
968 cur = src;
969 while (*cur == 0x20) {
970 cur++;
971 remove_head++;
972 }
973 while (*cur != 0) {
974 if (*cur == 0x20) {
975 cur++;
976 if ((*cur == 0x20) || (*cur == 0)) {
977 need_realloc = 1;
978 break;
979 }
980 } else
981 cur++;
982 }
983 if (need_realloc) {
984 xmlChar *ret;
985
986 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
987 if (ret == NULL) {
988 xmlErrMemory(ctxt, NULL);
989 return(NULL);
990 }
991 xmlAttrNormalizeSpace(ret, ret);
992 *len = strlen((const char *)ret);
993 return(ret);
994 } else if (remove_head) {
995 *len -= remove_head;
996 memmove(src, src + remove_head, 1 + *len);
997 return(src);
998 }
999 return(NULL);
1000 }
1001
1002 /**
1003 * xmlAddDefAttrs:
1004 * @ctxt: an XML parser context
1005 * @fullname: the element fullname
1006 * @fullattr: the attribute fullname
1007 * @value: the attribute value
1008 *
1009 * Add a defaulted attribute for an element
1010 */
1011 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1012 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1013 const xmlChar *fullname,
1014 const xmlChar *fullattr,
1015 const xmlChar *value) {
1016 xmlDefAttrsPtr defaults;
1017 xmlDefAttr *attr;
1018 int len, expandedSize;
1019 xmlHashedString name;
1020 xmlHashedString prefix;
1021 xmlHashedString hvalue;
1022 const xmlChar *localname;
1023
1024 /*
1025 * Allows to detect attribute redefinitions
1026 */
1027 if (ctxt->attsSpecial != NULL) {
1028 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1029 return;
1030 }
1031
1032 if (ctxt->attsDefault == NULL) {
1033 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1034 if (ctxt->attsDefault == NULL)
1035 goto mem_error;
1036 }
1037
1038 /*
1039 * split the element name into prefix:localname , the string found
1040 * are within the DTD and then not associated to namespace names.
1041 */
1042 localname = xmlSplitQName3(fullname, &len);
1043 if (localname == NULL) {
1044 name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1045 prefix.name = NULL;
1046 } else {
1047 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1048 prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1049 if (prefix.name == NULL)
1050 goto mem_error;
1051 }
1052 if (name.name == NULL)
1053 goto mem_error;
1054
1055 /*
1056 * make sure there is some storage
1057 */
1058 defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1059 if ((defaults == NULL) ||
1060 (defaults->nbAttrs >= defaults->maxAttrs)) {
1061 xmlDefAttrsPtr temp;
1062 int newSize;
1063
1064 newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1065 temp = xmlRealloc(defaults,
1066 sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1067 if (temp == NULL)
1068 goto mem_error;
1069 if (defaults == NULL)
1070 temp->nbAttrs = 0;
1071 temp->maxAttrs = newSize;
1072 defaults = temp;
1073 if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1074 defaults, NULL) < 0) {
1075 xmlFree(defaults);
1076 goto mem_error;
1077 }
1078 }
1079
1080 /*
1081 * Split the attribute name into prefix:localname , the string found
1082 * are within the DTD and hen not associated to namespace names.
1083 */
1084 localname = xmlSplitQName3(fullattr, &len);
1085 if (localname == NULL) {
1086 name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1087 prefix.name = NULL;
1088 } else {
1089 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1090 prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1091 if (prefix.name == NULL)
1092 goto mem_error;
1093 }
1094 if (name.name == NULL)
1095 goto mem_error;
1096
1097 /* intern the string and precompute the end */
1098 len = strlen((const char *) value);
1099 hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1100 if (hvalue.name == NULL)
1101 goto mem_error;
1102
1103 expandedSize = strlen((const char *) name.name);
1104 if (prefix.name != NULL)
1105 expandedSize += strlen((const char *) prefix.name);
1106 expandedSize += len;
1107
1108 attr = &defaults->attrs[defaults->nbAttrs++];
1109 attr->name = name;
1110 attr->prefix = prefix;
1111 attr->value = hvalue;
1112 attr->valueEnd = hvalue.name + len;
1113 attr->external = ctxt->external;
1114 attr->expandedSize = expandedSize;
1115
1116 return;
1117
1118 mem_error:
1119 xmlErrMemory(ctxt, NULL);
1120 return;
1121 }
1122
1123 /**
1124 * xmlAddSpecialAttr:
1125 * @ctxt: an XML parser context
1126 * @fullname: the element fullname
1127 * @fullattr: the attribute fullname
1128 * @type: the attribute type
1129 *
1130 * Register this attribute type
1131 */
1132 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1133 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1134 const xmlChar *fullname,
1135 const xmlChar *fullattr,
1136 int type)
1137 {
1138 if (ctxt->attsSpecial == NULL) {
1139 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1140 if (ctxt->attsSpecial == NULL)
1141 goto mem_error;
1142 }
1143
1144 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1145 return;
1146
1147 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1148 (void *) (ptrdiff_t) type);
1149 return;
1150
1151 mem_error:
1152 xmlErrMemory(ctxt, NULL);
1153 return;
1154 }
1155
1156 /**
1157 * xmlCleanSpecialAttrCallback:
1158 *
1159 * Removes CDATA attributes from the special attribute table
1160 */
1161 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1162 xmlCleanSpecialAttrCallback(void *payload, void *data,
1163 const xmlChar *fullname, const xmlChar *fullattr,
1164 const xmlChar *unused ATTRIBUTE_UNUSED) {
1165 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1166
1167 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1168 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1169 }
1170 }
1171
1172 /**
1173 * xmlCleanSpecialAttr:
1174 * @ctxt: an XML parser context
1175 *
1176 * Trim the list of attributes defined to remove all those of type
1177 * CDATA as they are not special. This call should be done when finishing
1178 * to parse the DTD and before starting to parse the document root.
1179 */
1180 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1181 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1182 {
1183 if (ctxt->attsSpecial == NULL)
1184 return;
1185
1186 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1187
1188 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1189 xmlHashFree(ctxt->attsSpecial, NULL);
1190 ctxt->attsSpecial = NULL;
1191 }
1192 return;
1193 }
1194
1195 /**
1196 * xmlCheckLanguageID:
1197 * @lang: pointer to the string value
1198 *
1199 * DEPRECATED: Internal function, do not use.
1200 *
1201 * Checks that the value conforms to the LanguageID production:
1202 *
1203 * NOTE: this is somewhat deprecated, those productions were removed from
1204 * the XML Second edition.
1205 *
1206 * [33] LanguageID ::= Langcode ('-' Subcode)*
1207 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1208 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1209 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1210 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1211 * [38] Subcode ::= ([a-z] | [A-Z])+
1212 *
1213 * The current REC reference the successors of RFC 1766, currently 5646
1214 *
1215 * http://www.rfc-editor.org/rfc/rfc5646.txt
1216 * langtag = language
1217 * ["-" script]
1218 * ["-" region]
1219 * *("-" variant)
1220 * *("-" extension)
1221 * ["-" privateuse]
1222 * language = 2*3ALPHA ; shortest ISO 639 code
1223 * ["-" extlang] ; sometimes followed by
1224 * ; extended language subtags
1225 * / 4ALPHA ; or reserved for future use
1226 * / 5*8ALPHA ; or registered language subtag
1227 *
1228 * extlang = 3ALPHA ; selected ISO 639 codes
1229 * *2("-" 3ALPHA) ; permanently reserved
1230 *
1231 * script = 4ALPHA ; ISO 15924 code
1232 *
1233 * region = 2ALPHA ; ISO 3166-1 code
1234 * / 3DIGIT ; UN M.49 code
1235 *
1236 * variant = 5*8alphanum ; registered variants
1237 * / (DIGIT 3alphanum)
1238 *
1239 * extension = singleton 1*("-" (2*8alphanum))
1240 *
1241 * ; Single alphanumerics
1242 * ; "x" reserved for private use
1243 * singleton = DIGIT ; 0 - 9
1244 * / %x41-57 ; A - W
1245 * / %x59-5A ; Y - Z
1246 * / %x61-77 ; a - w
1247 * / %x79-7A ; y - z
1248 *
1249 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1250 * The parser below doesn't try to cope with extension or privateuse
1251 * that could be added but that's not interoperable anyway
1252 *
1253 * Returns 1 if correct 0 otherwise
1254 **/
1255 int
xmlCheckLanguageID(const xmlChar * lang)1256 xmlCheckLanguageID(const xmlChar * lang)
1257 {
1258 const xmlChar *cur = lang, *nxt;
1259
1260 if (cur == NULL)
1261 return (0);
1262 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1263 ((cur[0] == 'I') && (cur[1] == '-')) ||
1264 ((cur[0] == 'x') && (cur[1] == '-')) ||
1265 ((cur[0] == 'X') && (cur[1] == '-'))) {
1266 /*
1267 * Still allow IANA code and user code which were coming
1268 * from the previous version of the XML-1.0 specification
1269 * it's deprecated but we should not fail
1270 */
1271 cur += 2;
1272 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1273 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1274 cur++;
1275 return(cur[0] == 0);
1276 }
1277 nxt = cur;
1278 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1279 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1280 nxt++;
1281 if (nxt - cur >= 4) {
1282 /*
1283 * Reserved
1284 */
1285 if ((nxt - cur > 8) || (nxt[0] != 0))
1286 return(0);
1287 return(1);
1288 }
1289 if (nxt - cur < 2)
1290 return(0);
1291 /* we got an ISO 639 code */
1292 if (nxt[0] == 0)
1293 return(1);
1294 if (nxt[0] != '-')
1295 return(0);
1296
1297 nxt++;
1298 cur = nxt;
1299 /* now we can have extlang or script or region or variant */
1300 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1301 goto region_m49;
1302
1303 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1304 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1305 nxt++;
1306 if (nxt - cur == 4)
1307 goto script;
1308 if (nxt - cur == 2)
1309 goto region;
1310 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1311 goto variant;
1312 if (nxt - cur != 3)
1313 return(0);
1314 /* we parsed an extlang */
1315 if (nxt[0] == 0)
1316 return(1);
1317 if (nxt[0] != '-')
1318 return(0);
1319
1320 nxt++;
1321 cur = nxt;
1322 /* now we can have script or region or variant */
1323 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1324 goto region_m49;
1325
1326 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1327 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1328 nxt++;
1329 if (nxt - cur == 2)
1330 goto region;
1331 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1332 goto variant;
1333 if (nxt - cur != 4)
1334 return(0);
1335 /* we parsed a script */
1336 script:
1337 if (nxt[0] == 0)
1338 return(1);
1339 if (nxt[0] != '-')
1340 return(0);
1341
1342 nxt++;
1343 cur = nxt;
1344 /* now we can have region or variant */
1345 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1346 goto region_m49;
1347
1348 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1349 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1350 nxt++;
1351
1352 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353 goto variant;
1354 if (nxt - cur != 2)
1355 return(0);
1356 /* we parsed a region */
1357 region:
1358 if (nxt[0] == 0)
1359 return(1);
1360 if (nxt[0] != '-')
1361 return(0);
1362
1363 nxt++;
1364 cur = nxt;
1365 /* now we can just have a variant */
1366 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1367 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1368 nxt++;
1369
1370 if ((nxt - cur < 5) || (nxt - cur > 8))
1371 return(0);
1372
1373 /* we parsed a variant */
1374 variant:
1375 if (nxt[0] == 0)
1376 return(1);
1377 if (nxt[0] != '-')
1378 return(0);
1379 /* extensions and private use subtags not checked */
1380 return (1);
1381
1382 region_m49:
1383 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1384 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1385 nxt += 3;
1386 goto region;
1387 }
1388 return(0);
1389 }
1390
1391 /************************************************************************
1392 * *
1393 * Parser stacks related functions and macros *
1394 * *
1395 ************************************************************************/
1396
1397 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1398 const xmlChar ** str);
1399
1400 /**
1401 * xmlParserNsCreate:
1402 *
1403 * Create a new namespace database.
1404 *
1405 * Returns the new obejct.
1406 */
1407 xmlParserNsData *
xmlParserNsCreate(void)1408 xmlParserNsCreate(void) {
1409 xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1410
1411 if (nsdb == NULL)
1412 return(NULL);
1413 memset(nsdb, 0, sizeof(*nsdb));
1414 nsdb->defaultNsIndex = INT_MAX;
1415
1416 return(nsdb);
1417 }
1418
1419 /**
1420 * xmlParserNsFree:
1421 * @nsdb: namespace database
1422 *
1423 * Free a namespace database.
1424 */
1425 void
xmlParserNsFree(xmlParserNsData * nsdb)1426 xmlParserNsFree(xmlParserNsData *nsdb) {
1427 if (nsdb == NULL)
1428 return;
1429
1430 xmlFree(nsdb->extra);
1431 xmlFree(nsdb->hash);
1432 xmlFree(nsdb);
1433 }
1434
1435 /**
1436 * xmlParserNsReset:
1437 * @nsdb: namespace database
1438 *
1439 * Reset a namespace database.
1440 */
1441 static void
xmlParserNsReset(xmlParserNsData * nsdb)1442 xmlParserNsReset(xmlParserNsData *nsdb) {
1443 if (nsdb == NULL)
1444 return;
1445
1446 nsdb->hashElems = 0;
1447 nsdb->elementId = 0;
1448 nsdb->defaultNsIndex = INT_MAX;
1449
1450 if (nsdb->hash)
1451 memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1452 }
1453
1454 /**
1455 * xmlParserStartElement:
1456 * @nsdb: namespace database
1457 *
1458 * Signal that a new element has started.
1459 *
1460 * Returns 0 on success, -1 if the element counter overflowed.
1461 */
1462 static int
xmlParserNsStartElement(xmlParserNsData * nsdb)1463 xmlParserNsStartElement(xmlParserNsData *nsdb) {
1464 if (nsdb->elementId == UINT_MAX)
1465 return(-1);
1466 nsdb->elementId++;
1467
1468 return(0);
1469 }
1470
1471 /**
1472 * xmlParserNsLookup:
1473 * @ctxt: parser context
1474 * @prefix: namespace prefix
1475 * @bucketPtr: optional bucket (return value)
1476 *
1477 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1478 * be set to the matching bucket, or the first empty bucket if no match
1479 * was found.
1480 *
1481 * Returns the namespace index on success, INT_MAX if no namespace was
1482 * found.
1483 */
1484 static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,xmlParserNsBucket ** bucketPtr)1485 xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1486 xmlParserNsBucket **bucketPtr) {
1487 xmlParserNsBucket *bucket;
1488 unsigned index, hashValue;
1489
1490 if (prefix->name == NULL)
1491 return(ctxt->nsdb->defaultNsIndex);
1492
1493 if (ctxt->nsdb->hashSize == 0)
1494 return(INT_MAX);
1495
1496 hashValue = prefix->hashValue;
1497 index = hashValue & (ctxt->nsdb->hashSize - 1);
1498 bucket = &ctxt->nsdb->hash[index];
1499
1500 while (bucket->hashValue) {
1501 if ((bucket->hashValue == hashValue) &&
1502 (bucket->index != INT_MAX)) {
1503 if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1504 if (bucketPtr != NULL)
1505 *bucketPtr = bucket;
1506 return(bucket->index);
1507 }
1508 }
1509
1510 index++;
1511 bucket++;
1512 if (index == ctxt->nsdb->hashSize) {
1513 index = 0;
1514 bucket = ctxt->nsdb->hash;
1515 }
1516 }
1517
1518 if (bucketPtr != NULL)
1519 *bucketPtr = bucket;
1520 return(INT_MAX);
1521 }
1522
1523 /**
1524 * xmlParserNsLookupUri:
1525 * @ctxt: parser context
1526 * @prefix: namespace prefix
1527 *
1528 * Lookup namespace URI with given prefix.
1529 *
1530 * Returns the namespace URI on success, NULL if no namespace was found.
1531 */
1532 static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix)1533 xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1534 const xmlChar *ret;
1535 int nsIndex;
1536
1537 if (prefix->name == ctxt->str_xml)
1538 return(ctxt->str_xml_ns);
1539
1540 nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1541 if (nsIndex == INT_MAX)
1542 return(NULL);
1543
1544 ret = ctxt->nsTab[nsIndex * 2 + 1];
1545 if (ret[0] == 0)
1546 ret = NULL;
1547 return(ret);
1548 }
1549
1550 /**
1551 * xmlParserNsLookupSax:
1552 * @ctxt: parser context
1553 * @prefix: namespace prefix
1554 *
1555 * Lookup extra data for the given prefix. This returns data stored
1556 * with xmlParserNsUdpateSax.
1557 *
1558 * Returns the data on success, NULL if no namespace was found.
1559 */
1560 void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix)1561 xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1562 xmlHashedString hprefix;
1563 int nsIndex;
1564
1565 if (prefix == ctxt->str_xml)
1566 return(NULL);
1567
1568 hprefix.name = prefix;
1569 if (prefix != NULL)
1570 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1571 else
1572 hprefix.hashValue = 0;
1573 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1574 if (nsIndex == INT_MAX)
1575 return(NULL);
1576
1577 return(ctxt->nsdb->extra[nsIndex].saxData);
1578 }
1579
1580 /**
1581 * xmlParserNsUpdateSax:
1582 * @ctxt: parser context
1583 * @prefix: namespace prefix
1584 * @saxData: extra data for SAX handler
1585 *
1586 * Sets or updates extra data for the given prefix. This value will be
1587 * returned by xmlParserNsLookupSax as long as the namespace with the
1588 * given prefix is in scope.
1589 *
1590 * Returns the data on success, NULL if no namespace was found.
1591 */
1592 int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix,void * saxData)1593 xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1594 void *saxData) {
1595 xmlHashedString hprefix;
1596 int nsIndex;
1597
1598 if (prefix == ctxt->str_xml)
1599 return(-1);
1600
1601 hprefix.name = prefix;
1602 if (prefix != NULL)
1603 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1604 else
1605 hprefix.hashValue = 0;
1606 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1607 if (nsIndex == INT_MAX)
1608 return(-1);
1609
1610 ctxt->nsdb->extra[nsIndex].saxData = saxData;
1611 return(0);
1612 }
1613
1614 /**
1615 * xmlParserNsGrow:
1616 * @ctxt: parser context
1617 *
1618 * Grows the namespace tables.
1619 *
1620 * Returns 0 on success, -1 if a memory allocation failed.
1621 */
1622 static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt)1623 xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1624 const xmlChar **table;
1625 xmlParserNsExtra *extra;
1626 int newSize;
1627
1628 if (ctxt->nsMax > INT_MAX / 2)
1629 goto error;
1630 newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1631
1632 table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1633 if (table == NULL)
1634 goto error;
1635 ctxt->nsTab = table;
1636
1637 extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1638 if (extra == NULL)
1639 goto error;
1640 ctxt->nsdb->extra = extra;
1641
1642 ctxt->nsMax = newSize;
1643 return(0);
1644
1645 error:
1646 xmlErrMemory(ctxt, NULL);
1647 return(-1);
1648 }
1649
1650 /**
1651 * xmlParserNsPush:
1652 * @ctxt: parser context
1653 * @prefix: prefix with hash value
1654 * @uri: uri with hash value
1655 * @saxData: extra data for SAX handler
1656 * @defAttr: whether the namespace comes from a default attribute
1657 *
1658 * Push a new namespace on the table.
1659 *
1660 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1661 * -1 if a memory allocation failed.
1662 */
1663 static int
xmlParserNsPush(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,const xmlHashedString * uri,void * saxData,int defAttr)1664 xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1665 const xmlHashedString *uri, void *saxData, int defAttr) {
1666 xmlParserNsBucket *bucket = NULL;
1667 xmlParserNsExtra *extra;
1668 const xmlChar **ns;
1669 unsigned hashValue, nsIndex, oldIndex;
1670
1671 if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1672 return(0);
1673
1674 if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1675 xmlErrMemory(ctxt, NULL);
1676 return(-1);
1677 }
1678
1679 /*
1680 * Default namespace and 'xml' namespace
1681 */
1682 if ((prefix == NULL) || (prefix->name == NULL)) {
1683 oldIndex = ctxt->nsdb->defaultNsIndex;
1684
1685 if (oldIndex != INT_MAX) {
1686 if (defAttr != 0)
1687 return(0);
1688
1689 extra = &ctxt->nsdb->extra[oldIndex];
1690
1691 if (extra->elementId == ctxt->nsdb->elementId) {
1692 xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1693 return(0);
1694 }
1695
1696 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1697 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1698 return(0);
1699 }
1700
1701 ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1702 goto populate_entry;
1703 }
1704
1705 /*
1706 * Hash table lookup
1707 */
1708 oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1709 if (oldIndex != INT_MAX) {
1710 extra = &ctxt->nsdb->extra[oldIndex];
1711
1712 if (defAttr != 0)
1713 return(0);
1714
1715 /*
1716 * Check for duplicate definitions on the same element.
1717 */
1718 if (extra->elementId == ctxt->nsdb->elementId) {
1719 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1720 return(0);
1721 }
1722
1723 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1724 (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1725 return(0);
1726
1727 bucket->index = ctxt->nsNr;
1728 goto populate_entry;
1729 }
1730
1731 /*
1732 * Insert new bucket
1733 */
1734
1735 hashValue = prefix->hashValue;
1736
1737 /*
1738 * Grow hash table, 50% fill factor
1739 */
1740 if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1741 xmlParserNsBucket *newHash;
1742 unsigned newSize, i, index;
1743
1744 if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1745 xmlErrMemory(ctxt, NULL);
1746 return(-1);
1747 }
1748 newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1749 newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1750 if (newHash == NULL) {
1751 xmlErrMemory(ctxt, NULL);
1752 return(-1);
1753 }
1754 memset(newHash, 0, newSize * sizeof(newHash[0]));
1755
1756 for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1757 unsigned hv = ctxt->nsdb->hash[i].hashValue;
1758 unsigned newIndex;
1759
1760 if (hv == 0)
1761 continue;
1762 newIndex = hv & (newSize - 1);
1763
1764 while (newHash[newIndex].hashValue != 0) {
1765 newIndex++;
1766 if (newIndex == newSize)
1767 newIndex = 0;
1768 }
1769
1770 newHash[newIndex] = ctxt->nsdb->hash[i];
1771 }
1772
1773 xmlFree(ctxt->nsdb->hash);
1774 ctxt->nsdb->hash = newHash;
1775 ctxt->nsdb->hashSize = newSize;
1776
1777 /*
1778 * Relookup
1779 */
1780 index = hashValue & (newSize - 1);
1781
1782 while (newHash[index].hashValue != 0) {
1783 index++;
1784 if (index == newSize)
1785 index = 0;
1786 }
1787
1788 bucket = &newHash[index];
1789 }
1790
1791 bucket->hashValue = hashValue;
1792 bucket->index = ctxt->nsNr;
1793 ctxt->nsdb->hashElems++;
1794 oldIndex = INT_MAX;
1795
1796 populate_entry:
1797 nsIndex = ctxt->nsNr;
1798
1799 ns = &ctxt->nsTab[nsIndex * 2];
1800 ns[0] = prefix ? prefix->name : NULL;
1801 ns[1] = uri->name;
1802
1803 extra = &ctxt->nsdb->extra[nsIndex];
1804 extra->saxData = saxData;
1805 extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1806 extra->uriHashValue = uri->hashValue;
1807 extra->elementId = ctxt->nsdb->elementId;
1808 extra->oldIndex = oldIndex;
1809
1810 ctxt->nsNr++;
1811
1812 return(1);
1813 }
1814
1815 /**
1816 * xmlParserNsPop:
1817 * @ctxt: an XML parser context
1818 * @nr: the number to pop
1819 *
1820 * Pops the top @nr namespaces and restores the hash table.
1821 *
1822 * Returns the number of namespaces popped.
1823 */
1824 static int
xmlParserNsPop(xmlParserCtxtPtr ctxt,int nr)1825 xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1826 {
1827 int i;
1828
1829 /* assert(nr <= ctxt->nsNr); */
1830
1831 for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1832 const xmlChar *prefix = ctxt->nsTab[i * 2];
1833 xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1834
1835 if (prefix == NULL) {
1836 ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1837 } else {
1838 xmlHashedString hprefix;
1839 xmlParserNsBucket *bucket = NULL;
1840
1841 hprefix.name = prefix;
1842 hprefix.hashValue = extra->prefixHashValue;
1843 xmlParserNsLookup(ctxt, &hprefix, &bucket);
1844 /* assert(bucket && bucket->hashValue); */
1845 bucket->index = extra->oldIndex;
1846 }
1847 }
1848
1849 ctxt->nsNr -= nr;
1850 return(nr);
1851 }
1852
1853 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1854 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1855 const xmlChar **atts;
1856 unsigned *attallocs;
1857 int maxatts;
1858
1859 if (nr + 5 > ctxt->maxatts) {
1860 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1861 atts = (const xmlChar **) xmlMalloc(
1862 maxatts * sizeof(const xmlChar *));
1863 if (atts == NULL) goto mem_error;
1864 attallocs = xmlRealloc(ctxt->attallocs,
1865 (maxatts / 5) * sizeof(attallocs[0]));
1866 if (attallocs == NULL) {
1867 xmlFree(atts);
1868 goto mem_error;
1869 }
1870 if (ctxt->maxatts > 0)
1871 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1872 xmlFree(ctxt->atts);
1873 ctxt->atts = atts;
1874 ctxt->attallocs = attallocs;
1875 ctxt->maxatts = maxatts;
1876 }
1877 return(ctxt->maxatts);
1878 mem_error:
1879 xmlErrMemory(ctxt, NULL);
1880 return(-1);
1881 }
1882
1883 /**
1884 * inputPush:
1885 * @ctxt: an XML parser context
1886 * @value: the parser input
1887 *
1888 * Pushes a new parser input on top of the input stack
1889 *
1890 * Returns -1 in case of error, the index in the stack otherwise
1891 */
1892 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1893 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1894 {
1895 if ((ctxt == NULL) || (value == NULL))
1896 return(-1);
1897 if (ctxt->inputNr >= ctxt->inputMax) {
1898 size_t newSize = ctxt->inputMax * 2;
1899 xmlParserInputPtr *tmp;
1900
1901 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1902 newSize * sizeof(*tmp));
1903 if (tmp == NULL) {
1904 xmlErrMemory(ctxt, NULL);
1905 return (-1);
1906 }
1907 ctxt->inputTab = tmp;
1908 ctxt->inputMax = newSize;
1909 }
1910 ctxt->inputTab[ctxt->inputNr] = value;
1911 ctxt->input = value;
1912 return (ctxt->inputNr++);
1913 }
1914 /**
1915 * inputPop:
1916 * @ctxt: an XML parser context
1917 *
1918 * Pops the top parser input from the input stack
1919 *
1920 * Returns the input just removed
1921 */
1922 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1923 inputPop(xmlParserCtxtPtr ctxt)
1924 {
1925 xmlParserInputPtr ret;
1926
1927 if (ctxt == NULL)
1928 return(NULL);
1929 if (ctxt->inputNr <= 0)
1930 return (NULL);
1931 ctxt->inputNr--;
1932 if (ctxt->inputNr > 0)
1933 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1934 else
1935 ctxt->input = NULL;
1936 ret = ctxt->inputTab[ctxt->inputNr];
1937 ctxt->inputTab[ctxt->inputNr] = NULL;
1938 return (ret);
1939 }
1940 /**
1941 * nodePush:
1942 * @ctxt: an XML parser context
1943 * @value: the element node
1944 *
1945 * DEPRECATED: Internal function, do not use.
1946 *
1947 * Pushes a new element node on top of the node stack
1948 *
1949 * Returns -1 in case of error, the index in the stack otherwise
1950 */
1951 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1952 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1953 {
1954 if (ctxt == NULL) return(0);
1955 if (ctxt->nodeNr >= ctxt->nodeMax) {
1956 xmlNodePtr *tmp;
1957
1958 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1959 ctxt->nodeMax * 2 *
1960 sizeof(ctxt->nodeTab[0]));
1961 if (tmp == NULL) {
1962 xmlErrMemory(ctxt, NULL);
1963 return (-1);
1964 }
1965 ctxt->nodeTab = tmp;
1966 ctxt->nodeMax *= 2;
1967 }
1968 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1969 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1970 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1971 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1972 xmlParserMaxDepth);
1973 xmlHaltParser(ctxt);
1974 return(-1);
1975 }
1976 ctxt->nodeTab[ctxt->nodeNr] = value;
1977 ctxt->node = value;
1978 return (ctxt->nodeNr++);
1979 }
1980
1981 /**
1982 * nodePop:
1983 * @ctxt: an XML parser context
1984 *
1985 * DEPRECATED: Internal function, do not use.
1986 *
1987 * Pops the top element node from the node stack
1988 *
1989 * Returns the node just removed
1990 */
1991 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1992 nodePop(xmlParserCtxtPtr ctxt)
1993 {
1994 xmlNodePtr ret;
1995
1996 if (ctxt == NULL) return(NULL);
1997 if (ctxt->nodeNr <= 0)
1998 return (NULL);
1999 ctxt->nodeNr--;
2000 if (ctxt->nodeNr > 0)
2001 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2002 else
2003 ctxt->node = NULL;
2004 ret = ctxt->nodeTab[ctxt->nodeNr];
2005 ctxt->nodeTab[ctxt->nodeNr] = NULL;
2006 return (ret);
2007 }
2008
2009 /**
2010 * nameNsPush:
2011 * @ctxt: an XML parser context
2012 * @value: the element name
2013 * @prefix: the element prefix
2014 * @URI: the element namespace name
2015 * @line: the current line number for error messages
2016 * @nsNr: the number of namespaces pushed on the namespace table
2017 *
2018 * Pushes a new element name/prefix/URL on top of the name stack
2019 *
2020 * Returns -1 in case of error, the index in the stack otherwise
2021 */
2022 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)2023 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2024 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2025 {
2026 xmlStartTag *tag;
2027
2028 if (ctxt->nameNr >= ctxt->nameMax) {
2029 const xmlChar * *tmp;
2030 xmlStartTag *tmp2;
2031 ctxt->nameMax *= 2;
2032 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2033 ctxt->nameMax *
2034 sizeof(ctxt->nameTab[0]));
2035 if (tmp == NULL) {
2036 ctxt->nameMax /= 2;
2037 goto mem_error;
2038 }
2039 ctxt->nameTab = tmp;
2040 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2041 ctxt->nameMax *
2042 sizeof(ctxt->pushTab[0]));
2043 if (tmp2 == NULL) {
2044 ctxt->nameMax /= 2;
2045 goto mem_error;
2046 }
2047 ctxt->pushTab = tmp2;
2048 } else if (ctxt->pushTab == NULL) {
2049 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2050 sizeof(ctxt->pushTab[0]));
2051 if (ctxt->pushTab == NULL)
2052 goto mem_error;
2053 }
2054 ctxt->nameTab[ctxt->nameNr] = value;
2055 ctxt->name = value;
2056 tag = &ctxt->pushTab[ctxt->nameNr];
2057 tag->prefix = prefix;
2058 tag->URI = URI;
2059 tag->line = line;
2060 tag->nsNr = nsNr;
2061 return (ctxt->nameNr++);
2062 mem_error:
2063 xmlErrMemory(ctxt, NULL);
2064 return (-1);
2065 }
2066 #ifdef LIBXML_PUSH_ENABLED
2067 /**
2068 * nameNsPop:
2069 * @ctxt: an XML parser context
2070 *
2071 * Pops the top element/prefix/URI name from the name stack
2072 *
2073 * Returns the name just removed
2074 */
2075 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)2076 nameNsPop(xmlParserCtxtPtr ctxt)
2077 {
2078 const xmlChar *ret;
2079
2080 if (ctxt->nameNr <= 0)
2081 return (NULL);
2082 ctxt->nameNr--;
2083 if (ctxt->nameNr > 0)
2084 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2085 else
2086 ctxt->name = NULL;
2087 ret = ctxt->nameTab[ctxt->nameNr];
2088 ctxt->nameTab[ctxt->nameNr] = NULL;
2089 return (ret);
2090 }
2091 #endif /* LIBXML_PUSH_ENABLED */
2092
2093 /**
2094 * namePush:
2095 * @ctxt: an XML parser context
2096 * @value: the element name
2097 *
2098 * DEPRECATED: Internal function, do not use.
2099 *
2100 * Pushes a new element name on top of the name stack
2101 *
2102 * Returns -1 in case of error, the index in the stack otherwise
2103 */
2104 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)2105 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2106 {
2107 if (ctxt == NULL) return (-1);
2108
2109 if (ctxt->nameNr >= ctxt->nameMax) {
2110 const xmlChar * *tmp;
2111 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2112 ctxt->nameMax * 2 *
2113 sizeof(ctxt->nameTab[0]));
2114 if (tmp == NULL) {
2115 goto mem_error;
2116 }
2117 ctxt->nameTab = tmp;
2118 ctxt->nameMax *= 2;
2119 }
2120 ctxt->nameTab[ctxt->nameNr] = value;
2121 ctxt->name = value;
2122 return (ctxt->nameNr++);
2123 mem_error:
2124 xmlErrMemory(ctxt, NULL);
2125 return (-1);
2126 }
2127
2128 /**
2129 * namePop:
2130 * @ctxt: an XML parser context
2131 *
2132 * DEPRECATED: Internal function, do not use.
2133 *
2134 * Pops the top element name from the name stack
2135 *
2136 * Returns the name just removed
2137 */
2138 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)2139 namePop(xmlParserCtxtPtr ctxt)
2140 {
2141 const xmlChar *ret;
2142
2143 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2144 return (NULL);
2145 ctxt->nameNr--;
2146 if (ctxt->nameNr > 0)
2147 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2148 else
2149 ctxt->name = NULL;
2150 ret = ctxt->nameTab[ctxt->nameNr];
2151 ctxt->nameTab[ctxt->nameNr] = NULL;
2152 return (ret);
2153 }
2154
spacePush(xmlParserCtxtPtr ctxt,int val)2155 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2156 if (ctxt->spaceNr >= ctxt->spaceMax) {
2157 int *tmp;
2158
2159 ctxt->spaceMax *= 2;
2160 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2161 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2162 if (tmp == NULL) {
2163 xmlErrMemory(ctxt, NULL);
2164 ctxt->spaceMax /=2;
2165 return(-1);
2166 }
2167 ctxt->spaceTab = tmp;
2168 }
2169 ctxt->spaceTab[ctxt->spaceNr] = val;
2170 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2171 return(ctxt->spaceNr++);
2172 }
2173
spacePop(xmlParserCtxtPtr ctxt)2174 static int spacePop(xmlParserCtxtPtr ctxt) {
2175 int ret;
2176 if (ctxt->spaceNr <= 0) return(0);
2177 ctxt->spaceNr--;
2178 if (ctxt->spaceNr > 0)
2179 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2180 else
2181 ctxt->space = &ctxt->spaceTab[0];
2182 ret = ctxt->spaceTab[ctxt->spaceNr];
2183 ctxt->spaceTab[ctxt->spaceNr] = -1;
2184 return(ret);
2185 }
2186
2187 /*
2188 * Macros for accessing the content. Those should be used only by the parser,
2189 * and not exported.
2190 *
2191 * Dirty macros, i.e. one often need to make assumption on the context to
2192 * use them
2193 *
2194 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2195 * To be used with extreme caution since operations consuming
2196 * characters may move the input buffer to a different location !
2197 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2198 * This should be used internally by the parser
2199 * only to compare to ASCII values otherwise it would break when
2200 * running with UTF-8 encoding.
2201 * RAW same as CUR but in the input buffer, bypass any token
2202 * extraction that may have been done
2203 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2204 * to compare on ASCII based substring.
2205 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2206 * strings without newlines within the parser.
2207 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2208 * defined char within the parser.
2209 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2210 *
2211 * NEXT Skip to the next character, this does the proper decoding
2212 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2213 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2214 * CUR_CHAR(l) returns the current unicode character (int), set l
2215 * to the number of xmlChars used for the encoding [0-5].
2216 * CUR_SCHAR same but operate on a string instead of the context
2217 * COPY_BUF copy the current unicode char to the target buffer, increment
2218 * the index
2219 * GROW, SHRINK handling of input buffers
2220 */
2221
2222 #define RAW (*ctxt->input->cur)
2223 #define CUR (*ctxt->input->cur)
2224 #define NXT(val) ctxt->input->cur[(val)]
2225 #define CUR_PTR ctxt->input->cur
2226 #define BASE_PTR ctxt->input->base
2227
2228 #define CMP4( s, c1, c2, c3, c4 ) \
2229 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2230 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2231 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2232 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2233 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2234 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2235 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2236 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2237 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2238 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2239 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2240 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2241 ((unsigned char *) s)[ 8 ] == c9 )
2242 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2243 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2244 ((unsigned char *) s)[ 9 ] == c10 )
2245
2246 #define SKIP(val) do { \
2247 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2248 if (*ctxt->input->cur == 0) \
2249 xmlParserGrow(ctxt); \
2250 } while (0)
2251
2252 #define SKIPL(val) do { \
2253 int skipl; \
2254 for(skipl=0; skipl<val; skipl++) { \
2255 if (*(ctxt->input->cur) == '\n') { \
2256 ctxt->input->line++; ctxt->input->col = 1; \
2257 } else ctxt->input->col++; \
2258 ctxt->input->cur++; \
2259 } \
2260 if (*ctxt->input->cur == 0) \
2261 xmlParserGrow(ctxt); \
2262 } while (0)
2263
2264 /* Don't shrink push parser buffer. */
2265 #define SHRINK \
2266 if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \
2267 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2268 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2269 xmlParserShrink(ctxt);
2270
2271 #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
2272 xmlParserGrow(ctxt);
2273
2274 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2275
2276 #define NEXT xmlNextChar(ctxt)
2277
2278 #define NEXT1 { \
2279 ctxt->input->col++; \
2280 ctxt->input->cur++; \
2281 if (*ctxt->input->cur == 0) \
2282 xmlParserGrow(ctxt); \
2283 }
2284
2285 #define NEXTL(l) do { \
2286 if (*(ctxt->input->cur) == '\n') { \
2287 ctxt->input->line++; ctxt->input->col = 1; \
2288 } else ctxt->input->col++; \
2289 ctxt->input->cur += l; \
2290 } while (0)
2291
2292 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2293 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2294
2295 #define COPY_BUF(b, i, v) \
2296 if (v < 0x80) b[i++] = v; \
2297 else i += xmlCopyCharMultiByte(&b[i],v)
2298
2299 /**
2300 * xmlSkipBlankChars:
2301 * @ctxt: the XML parser context
2302 *
2303 * DEPRECATED: Internal function, do not use.
2304 *
2305 * skip all blanks character found at that point in the input streams.
2306 * It pops up finished entities in the process if allowable at that point.
2307 *
2308 * Returns the number of space chars skipped
2309 */
2310
2311 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2312 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2313 int res = 0;
2314
2315 /*
2316 * It's Okay to use CUR/NEXT here since all the blanks are on
2317 * the ASCII range.
2318 */
2319 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2320 (ctxt->instate == XML_PARSER_START)) {
2321 const xmlChar *cur;
2322 /*
2323 * if we are in the document content, go really fast
2324 */
2325 cur = ctxt->input->cur;
2326 while (IS_BLANK_CH(*cur)) {
2327 if (*cur == '\n') {
2328 ctxt->input->line++; ctxt->input->col = 1;
2329 } else {
2330 ctxt->input->col++;
2331 }
2332 cur++;
2333 if (res < INT_MAX)
2334 res++;
2335 if (*cur == 0) {
2336 ctxt->input->cur = cur;
2337 xmlParserGrow(ctxt);
2338 cur = ctxt->input->cur;
2339 }
2340 }
2341 ctxt->input->cur = cur;
2342 } else {
2343 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2344
2345 while (ctxt->instate != XML_PARSER_EOF) {
2346 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2347 NEXT;
2348 } else if (CUR == '%') {
2349 /*
2350 * Need to handle support of entities branching here
2351 */
2352 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2353 break;
2354 xmlParsePEReference(ctxt);
2355 } else if (CUR == 0) {
2356 unsigned long consumed;
2357 xmlEntityPtr ent;
2358
2359 if (ctxt->inputNr <= 1)
2360 break;
2361
2362 consumed = ctxt->input->consumed;
2363 xmlSaturatedAddSizeT(&consumed,
2364 ctxt->input->cur - ctxt->input->base);
2365
2366 /*
2367 * Add to sizeentities when parsing an external entity
2368 * for the first time.
2369 */
2370 ent = ctxt->input->entity;
2371 if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2372 ((ent->flags & XML_ENT_PARSED) == 0)) {
2373 ent->flags |= XML_ENT_PARSED;
2374
2375 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2376 }
2377
2378 xmlParserEntityCheck(ctxt, consumed);
2379
2380 xmlPopInput(ctxt);
2381 } else {
2382 break;
2383 }
2384
2385 /*
2386 * Also increase the counter when entering or exiting a PERef.
2387 * The spec says: "When a parameter-entity reference is recognized
2388 * in the DTD and included, its replacement text MUST be enlarged
2389 * by the attachment of one leading and one following space (#x20)
2390 * character."
2391 */
2392 if (res < INT_MAX)
2393 res++;
2394 }
2395 }
2396 return(res);
2397 }
2398
2399 /************************************************************************
2400 * *
2401 * Commodity functions to handle entities *
2402 * *
2403 ************************************************************************/
2404
2405 /**
2406 * xmlPopInput:
2407 * @ctxt: an XML parser context
2408 *
2409 * xmlPopInput: the current input pointed by ctxt->input came to an end
2410 * pop it and return the next char.
2411 *
2412 * Returns the current xmlChar in the parser context
2413 */
2414 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2415 xmlPopInput(xmlParserCtxtPtr ctxt) {
2416 xmlParserInputPtr input;
2417
2418 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2419 if (xmlParserDebugEntities)
2420 xmlGenericError(xmlGenericErrorContext,
2421 "Popping input %d\n", ctxt->inputNr);
2422 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2423 (ctxt->instate != XML_PARSER_EOF))
2424 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2425 "Unfinished entity outside the DTD");
2426 input = inputPop(ctxt);
2427 if (input->entity != NULL)
2428 input->entity->flags &= ~XML_ENT_EXPANDING;
2429 xmlFreeInputStream(input);
2430 if (*ctxt->input->cur == 0)
2431 xmlParserGrow(ctxt);
2432 return(CUR);
2433 }
2434
2435 /**
2436 * xmlPushInput:
2437 * @ctxt: an XML parser context
2438 * @input: an XML parser input fragment (entity, XML fragment ...).
2439 *
2440 * xmlPushInput: switch to a new input stream which is stacked on top
2441 * of the previous one(s).
2442 * Returns -1 in case of error or the index in the input stack
2443 */
2444 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2445 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2446 int ret;
2447 if (input == NULL) return(-1);
2448
2449 if (xmlParserDebugEntities) {
2450 if ((ctxt->input != NULL) && (ctxt->input->filename))
2451 xmlGenericError(xmlGenericErrorContext,
2452 "%s(%d): ", ctxt->input->filename,
2453 ctxt->input->line);
2454 xmlGenericError(xmlGenericErrorContext,
2455 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2456 }
2457 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2458 (ctxt->inputNr > 100)) {
2459 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2460 while (ctxt->inputNr > 1)
2461 xmlFreeInputStream(inputPop(ctxt));
2462 return(-1);
2463 }
2464 ret = inputPush(ctxt, input);
2465 if (ctxt->instate == XML_PARSER_EOF)
2466 return(-1);
2467 GROW;
2468 return(ret);
2469 }
2470
2471 /**
2472 * xmlParseCharRef:
2473 * @ctxt: an XML parser context
2474 *
2475 * DEPRECATED: Internal function, don't use.
2476 *
2477 * Parse a numeric character reference. Always consumes '&'.
2478 *
2479 * [66] CharRef ::= '&#' [0-9]+ ';' |
2480 * '&#x' [0-9a-fA-F]+ ';'
2481 *
2482 * [ WFC: Legal Character ]
2483 * Characters referred to using character references must match the
2484 * production for Char.
2485 *
2486 * Returns the value parsed (as an int), 0 in case of error
2487 */
2488 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2489 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2490 int val = 0;
2491 int count = 0;
2492
2493 /*
2494 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2495 */
2496 if ((RAW == '&') && (NXT(1) == '#') &&
2497 (NXT(2) == 'x')) {
2498 SKIP(3);
2499 GROW;
2500 while (RAW != ';') { /* loop blocked by count */
2501 if (count++ > 20) {
2502 count = 0;
2503 GROW;
2504 if (ctxt->instate == XML_PARSER_EOF)
2505 return(0);
2506 }
2507 if ((RAW >= '0') && (RAW <= '9'))
2508 val = val * 16 + (CUR - '0');
2509 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2510 val = val * 16 + (CUR - 'a') + 10;
2511 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2512 val = val * 16 + (CUR - 'A') + 10;
2513 else {
2514 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2515 val = 0;
2516 break;
2517 }
2518 if (val > 0x110000)
2519 val = 0x110000;
2520
2521 NEXT;
2522 count++;
2523 }
2524 if (RAW == ';') {
2525 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2526 ctxt->input->col++;
2527 ctxt->input->cur++;
2528 }
2529 } else if ((RAW == '&') && (NXT(1) == '#')) {
2530 SKIP(2);
2531 GROW;
2532 while (RAW != ';') { /* loop blocked by count */
2533 if (count++ > 20) {
2534 count = 0;
2535 GROW;
2536 if (ctxt->instate == XML_PARSER_EOF)
2537 return(0);
2538 }
2539 if ((RAW >= '0') && (RAW <= '9'))
2540 val = val * 10 + (CUR - '0');
2541 else {
2542 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2543 val = 0;
2544 break;
2545 }
2546 if (val > 0x110000)
2547 val = 0x110000;
2548
2549 NEXT;
2550 count++;
2551 }
2552 if (RAW == ';') {
2553 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2554 ctxt->input->col++;
2555 ctxt->input->cur++;
2556 }
2557 } else {
2558 if (RAW == '&')
2559 SKIP(1);
2560 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2561 }
2562
2563 /*
2564 * [ WFC: Legal Character ]
2565 * Characters referred to using character references must match the
2566 * production for Char.
2567 */
2568 if (val >= 0x110000) {
2569 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2570 "xmlParseCharRef: character reference out of bounds\n",
2571 val);
2572 } else if (IS_CHAR(val)) {
2573 return(val);
2574 } else {
2575 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2576 "xmlParseCharRef: invalid xmlChar value %d\n",
2577 val);
2578 }
2579 return(0);
2580 }
2581
2582 /**
2583 * xmlParseStringCharRef:
2584 * @ctxt: an XML parser context
2585 * @str: a pointer to an index in the string
2586 *
2587 * parse Reference declarations, variant parsing from a string rather
2588 * than an an input flow.
2589 *
2590 * [66] CharRef ::= '&#' [0-9]+ ';' |
2591 * '&#x' [0-9a-fA-F]+ ';'
2592 *
2593 * [ WFC: Legal Character ]
2594 * Characters referred to using character references must match the
2595 * production for Char.
2596 *
2597 * Returns the value parsed (as an int), 0 in case of error, str will be
2598 * updated to the current value of the index
2599 */
2600 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2601 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2602 const xmlChar *ptr;
2603 xmlChar cur;
2604 int val = 0;
2605
2606 if ((str == NULL) || (*str == NULL)) return(0);
2607 ptr = *str;
2608 cur = *ptr;
2609 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2610 ptr += 3;
2611 cur = *ptr;
2612 while (cur != ';') { /* Non input consuming loop */
2613 if ((cur >= '0') && (cur <= '9'))
2614 val = val * 16 + (cur - '0');
2615 else if ((cur >= 'a') && (cur <= 'f'))
2616 val = val * 16 + (cur - 'a') + 10;
2617 else if ((cur >= 'A') && (cur <= 'F'))
2618 val = val * 16 + (cur - 'A') + 10;
2619 else {
2620 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2621 val = 0;
2622 break;
2623 }
2624 if (val > 0x110000)
2625 val = 0x110000;
2626
2627 ptr++;
2628 cur = *ptr;
2629 }
2630 if (cur == ';')
2631 ptr++;
2632 } else if ((cur == '&') && (ptr[1] == '#')){
2633 ptr += 2;
2634 cur = *ptr;
2635 while (cur != ';') { /* Non input consuming loops */
2636 if ((cur >= '0') && (cur <= '9'))
2637 val = val * 10 + (cur - '0');
2638 else {
2639 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2640 val = 0;
2641 break;
2642 }
2643 if (val > 0x110000)
2644 val = 0x110000;
2645
2646 ptr++;
2647 cur = *ptr;
2648 }
2649 if (cur == ';')
2650 ptr++;
2651 } else {
2652 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2653 return(0);
2654 }
2655 *str = ptr;
2656
2657 /*
2658 * [ WFC: Legal Character ]
2659 * Characters referred to using character references must match the
2660 * production for Char.
2661 */
2662 if (val >= 0x110000) {
2663 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2664 "xmlParseStringCharRef: character reference out of bounds\n",
2665 val);
2666 } else if (IS_CHAR(val)) {
2667 return(val);
2668 } else {
2669 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2670 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2671 val);
2672 }
2673 return(0);
2674 }
2675
2676 /**
2677 * xmlParserHandlePEReference:
2678 * @ctxt: the parser context
2679 *
2680 * DEPRECATED: Internal function, do not use.
2681 *
2682 * [69] PEReference ::= '%' Name ';'
2683 *
2684 * [ WFC: No Recursion ]
2685 * A parsed entity must not contain a recursive
2686 * reference to itself, either directly or indirectly.
2687 *
2688 * [ WFC: Entity Declared ]
2689 * In a document without any DTD, a document with only an internal DTD
2690 * subset which contains no parameter entity references, or a document
2691 * with "standalone='yes'", ... ... The declaration of a parameter
2692 * entity must precede any reference to it...
2693 *
2694 * [ VC: Entity Declared ]
2695 * In a document with an external subset or external parameter entities
2696 * with "standalone='no'", ... ... The declaration of a parameter entity
2697 * must precede any reference to it...
2698 *
2699 * [ WFC: In DTD ]
2700 * Parameter-entity references may only appear in the DTD.
2701 * NOTE: misleading but this is handled.
2702 *
2703 * A PEReference may have been detected in the current input stream
2704 * the handling is done accordingly to
2705 * http://www.w3.org/TR/REC-xml#entproc
2706 * i.e.
2707 * - Included in literal in entity values
2708 * - Included as Parameter Entity reference within DTDs
2709 */
2710 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2711 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2712 switch(ctxt->instate) {
2713 case XML_PARSER_CDATA_SECTION:
2714 return;
2715 case XML_PARSER_COMMENT:
2716 return;
2717 case XML_PARSER_START_TAG:
2718 return;
2719 case XML_PARSER_END_TAG:
2720 return;
2721 case XML_PARSER_EOF:
2722 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2723 return;
2724 case XML_PARSER_PROLOG:
2725 case XML_PARSER_START:
2726 case XML_PARSER_XML_DECL:
2727 case XML_PARSER_MISC:
2728 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2729 return;
2730 case XML_PARSER_ENTITY_DECL:
2731 case XML_PARSER_CONTENT:
2732 case XML_PARSER_ATTRIBUTE_VALUE:
2733 case XML_PARSER_PI:
2734 case XML_PARSER_SYSTEM_LITERAL:
2735 case XML_PARSER_PUBLIC_LITERAL:
2736 /* we just ignore it there */
2737 return;
2738 case XML_PARSER_EPILOG:
2739 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2740 return;
2741 case XML_PARSER_ENTITY_VALUE:
2742 /*
2743 * NOTE: in the case of entity values, we don't do the
2744 * substitution here since we need the literal
2745 * entity value to be able to save the internal
2746 * subset of the document.
2747 * This will be handled by xmlStringDecodeEntities
2748 */
2749 return;
2750 case XML_PARSER_DTD:
2751 /*
2752 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2753 * In the internal DTD subset, parameter-entity references
2754 * can occur only where markup declarations can occur, not
2755 * within markup declarations.
2756 * In that case this is handled in xmlParseMarkupDecl
2757 */
2758 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2759 return;
2760 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2761 return;
2762 break;
2763 case XML_PARSER_IGNORE:
2764 return;
2765 }
2766
2767 xmlParsePEReference(ctxt);
2768 }
2769
2770 /*
2771 * Macro used to grow the current buffer.
2772 * buffer##_size is expected to be a size_t
2773 * mem_error: is expected to handle memory allocation failures
2774 */
2775 #define growBuffer(buffer, n) { \
2776 xmlChar *tmp; \
2777 size_t new_size = buffer##_size * 2 + n; \
2778 if (new_size < buffer##_size) goto mem_error; \
2779 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2780 if (tmp == NULL) goto mem_error; \
2781 buffer = tmp; \
2782 buffer##_size = new_size; \
2783 }
2784
2785 /**
2786 * xmlStringDecodeEntitiesInt:
2787 * @ctxt: the parser context
2788 * @str: the input string
2789 * @len: the string length
2790 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2791 * @end: an end marker xmlChar, 0 if none
2792 * @end2: an end marker xmlChar, 0 if none
2793 * @end3: an end marker xmlChar, 0 if none
2794 * @check: whether to perform entity checks
2795 */
2796 static xmlChar *
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3,int check)2797 xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2798 int what, xmlChar end, xmlChar end2, xmlChar end3,
2799 int check) {
2800 xmlChar *buffer = NULL;
2801 size_t buffer_size = 0;
2802 size_t nbchars = 0;
2803
2804 xmlChar *current = NULL;
2805 xmlChar *rep = NULL;
2806 const xmlChar *last;
2807 xmlEntityPtr ent;
2808 int c,l;
2809
2810 if (str == NULL)
2811 return(NULL);
2812 last = str + len;
2813
2814 if (((ctxt->depth > 40) &&
2815 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2816 (ctxt->depth > 100)) {
2817 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2818 "Maximum entity nesting depth exceeded");
2819 return(NULL);
2820 }
2821
2822 /*
2823 * allocate a translation buffer.
2824 */
2825 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2826 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2827 if (buffer == NULL) goto mem_error;
2828
2829 /*
2830 * OK loop until we reach one of the ending char or a size limit.
2831 * we are operating on already parsed values.
2832 */
2833 if (str < last)
2834 c = CUR_SCHAR(str, l);
2835 else
2836 c = 0;
2837 while ((c != 0) && (c != end) && /* non input consuming loop */
2838 (c != end2) && (c != end3) &&
2839 (ctxt->instate != XML_PARSER_EOF)) {
2840
2841 if (c == 0) break;
2842 if ((c == '&') && (str[1] == '#')) {
2843 int val = xmlParseStringCharRef(ctxt, &str);
2844 if (val == 0)
2845 goto int_error;
2846 COPY_BUF(buffer, nbchars, val);
2847 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2848 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2849 }
2850 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2851 if (xmlParserDebugEntities)
2852 xmlGenericError(xmlGenericErrorContext,
2853 "String decoding Entity Reference: %.30s\n",
2854 str);
2855 ent = xmlParseStringEntityRef(ctxt, &str);
2856 if ((ent != NULL) &&
2857 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2858 if (ent->content != NULL) {
2859 COPY_BUF(buffer, nbchars, ent->content[0]);
2860 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2861 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2862 }
2863 } else {
2864 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2865 "predefined entity has no content\n");
2866 goto int_error;
2867 }
2868 } else if ((ent != NULL) && (ent->content != NULL)) {
2869 if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2870 goto int_error;
2871
2872 if (ent->flags & XML_ENT_EXPANDING) {
2873 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2874 xmlHaltParser(ctxt);
2875 ent->content[0] = 0;
2876 goto int_error;
2877 }
2878
2879 ent->flags |= XML_ENT_EXPANDING;
2880 ctxt->depth++;
2881 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2882 ent->length, what, 0, 0, 0, check);
2883 ctxt->depth--;
2884 ent->flags &= ~XML_ENT_EXPANDING;
2885
2886 if (rep == NULL) {
2887 ent->content[0] = 0;
2888 goto int_error;
2889 }
2890
2891 current = rep;
2892 while (*current != 0) { /* non input consuming loop */
2893 buffer[nbchars++] = *current++;
2894 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2895 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2896 }
2897 }
2898 xmlFree(rep);
2899 rep = NULL;
2900 } else if (ent != NULL) {
2901 int i = xmlStrlen(ent->name);
2902 const xmlChar *cur = ent->name;
2903
2904 buffer[nbchars++] = '&';
2905 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2906 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2907 }
2908 for (;i > 0;i--)
2909 buffer[nbchars++] = *cur++;
2910 buffer[nbchars++] = ';';
2911 }
2912 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2913 if (xmlParserDebugEntities)
2914 xmlGenericError(xmlGenericErrorContext,
2915 "String decoding PE Reference: %.30s\n", str);
2916 ent = xmlParseStringPEReference(ctxt, &str);
2917 if (ent != NULL) {
2918 if (ent->content == NULL) {
2919 /*
2920 * Note: external parsed entities will not be loaded,
2921 * it is not required for a non-validating parser to
2922 * complete external PEReferences coming from the
2923 * internal subset
2924 */
2925 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2926 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2927 (ctxt->validate != 0)) {
2928 xmlLoadEntityContent(ctxt, ent);
2929 } else {
2930 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2931 "not validating will not read content for PE entity %s\n",
2932 ent->name, NULL);
2933 }
2934 }
2935
2936 if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2937 goto int_error;
2938
2939 if (ent->flags & XML_ENT_EXPANDING) {
2940 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2941 xmlHaltParser(ctxt);
2942 if (ent->content != NULL)
2943 ent->content[0] = 0;
2944 goto int_error;
2945 }
2946
2947 ent->flags |= XML_ENT_EXPANDING;
2948 ctxt->depth++;
2949 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2950 ent->length, what, 0, 0, 0, check);
2951 ctxt->depth--;
2952 ent->flags &= ~XML_ENT_EXPANDING;
2953
2954 if (rep == NULL) {
2955 if (ent->content != NULL)
2956 ent->content[0] = 0;
2957 goto int_error;
2958 }
2959 current = rep;
2960 while (*current != 0) { /* non input consuming loop */
2961 buffer[nbchars++] = *current++;
2962 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2963 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2964 }
2965 }
2966 xmlFree(rep);
2967 rep = NULL;
2968 }
2969 } else {
2970 COPY_BUF(buffer, nbchars, c);
2971 str += l;
2972 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2973 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2974 }
2975 }
2976 if (str < last)
2977 c = CUR_SCHAR(str, l);
2978 else
2979 c = 0;
2980 }
2981 buffer[nbchars] = 0;
2982 return(buffer);
2983
2984 mem_error:
2985 xmlErrMemory(ctxt, NULL);
2986 int_error:
2987 if (rep != NULL)
2988 xmlFree(rep);
2989 if (buffer != NULL)
2990 xmlFree(buffer);
2991 return(NULL);
2992 }
2993
2994 /**
2995 * xmlStringLenDecodeEntities:
2996 * @ctxt: the parser context
2997 * @str: the input string
2998 * @len: the string length
2999 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3000 * @end: an end marker xmlChar, 0 if none
3001 * @end2: an end marker xmlChar, 0 if none
3002 * @end3: an end marker xmlChar, 0 if none
3003 *
3004 * DEPRECATED: Internal function, don't use.
3005 *
3006 * Takes a entity string content and process to do the adequate substitutions.
3007 *
3008 * [67] Reference ::= EntityRef | CharRef
3009 *
3010 * [69] PEReference ::= '%' Name ';'
3011 *
3012 * Returns A newly allocated string with the substitution done. The caller
3013 * must deallocate it !
3014 */
3015 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)3016 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3017 int what, xmlChar end, xmlChar end2,
3018 xmlChar end3) {
3019 if ((ctxt == NULL) || (str == NULL) || (len < 0))
3020 return(NULL);
3021 return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
3022 end, end2, end3, 0));
3023 }
3024
3025 /**
3026 * xmlStringDecodeEntities:
3027 * @ctxt: the parser context
3028 * @str: the input string
3029 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3030 * @end: an end marker xmlChar, 0 if none
3031 * @end2: an end marker xmlChar, 0 if none
3032 * @end3: an end marker xmlChar, 0 if none
3033 *
3034 * DEPRECATED: Internal function, don't use.
3035 *
3036 * Takes a entity string content and process to do the adequate substitutions.
3037 *
3038 * [67] Reference ::= EntityRef | CharRef
3039 *
3040 * [69] PEReference ::= '%' Name ';'
3041 *
3042 * Returns A newly allocated string with the substitution done. The caller
3043 * must deallocate it !
3044 */
3045 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)3046 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
3047 xmlChar end, xmlChar end2, xmlChar end3) {
3048 if ((ctxt == NULL) || (str == NULL)) return(NULL);
3049 return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
3050 end, end2, end3, 0));
3051 }
3052
3053 /************************************************************************
3054 * *
3055 * Commodity functions, cleanup needed ? *
3056 * *
3057 ************************************************************************/
3058
3059 /**
3060 * areBlanks:
3061 * @ctxt: an XML parser context
3062 * @str: a xmlChar *
3063 * @len: the size of @str
3064 * @blank_chars: we know the chars are blanks
3065 *
3066 * Is this a sequence of blank chars that one can ignore ?
3067 *
3068 * Returns 1 if ignorable 0 otherwise.
3069 */
3070
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)3071 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3072 int blank_chars) {
3073 int i, ret;
3074 xmlNodePtr lastChild;
3075
3076 /*
3077 * Don't spend time trying to differentiate them, the same callback is
3078 * used !
3079 */
3080 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
3081 return(0);
3082
3083 /*
3084 * Check for xml:space value.
3085 */
3086 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
3087 (*(ctxt->space) == -2))
3088 return(0);
3089
3090 /*
3091 * Check that the string is made of blanks
3092 */
3093 if (blank_chars == 0) {
3094 for (i = 0;i < len;i++)
3095 if (!(IS_BLANK_CH(str[i]))) return(0);
3096 }
3097
3098 /*
3099 * Look if the element is mixed content in the DTD if available
3100 */
3101 if (ctxt->node == NULL) return(0);
3102 if (ctxt->myDoc != NULL) {
3103 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3104 if (ret == 0) return(1);
3105 if (ret == 1) return(0);
3106 }
3107
3108 /*
3109 * Otherwise, heuristic :-\
3110 */
3111 if ((RAW != '<') && (RAW != 0xD)) return(0);
3112 if ((ctxt->node->children == NULL) &&
3113 (RAW == '<') && (NXT(1) == '/')) return(0);
3114
3115 lastChild = xmlGetLastChild(ctxt->node);
3116 if (lastChild == NULL) {
3117 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3118 (ctxt->node->content != NULL)) return(0);
3119 } else if (xmlNodeIsText(lastChild))
3120 return(0);
3121 else if ((ctxt->node->children != NULL) &&
3122 (xmlNodeIsText(ctxt->node->children)))
3123 return(0);
3124 return(1);
3125 }
3126
3127 /************************************************************************
3128 * *
3129 * Extra stuff for namespace support *
3130 * Relates to http://www.w3.org/TR/WD-xml-names *
3131 * *
3132 ************************************************************************/
3133
3134 /**
3135 * xmlSplitQName:
3136 * @ctxt: an XML parser context
3137 * @name: an XML parser context
3138 * @prefix: a xmlChar **
3139 *
3140 * parse an UTF8 encoded XML qualified name string
3141 *
3142 * [NS 5] QName ::= (Prefix ':')? LocalPart
3143 *
3144 * [NS 6] Prefix ::= NCName
3145 *
3146 * [NS 7] LocalPart ::= NCName
3147 *
3148 * Returns the local part, and prefix is updated
3149 * to get the Prefix if any.
3150 */
3151
3152 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)3153 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3154 xmlChar buf[XML_MAX_NAMELEN + 5];
3155 xmlChar *buffer = NULL;
3156 int len = 0;
3157 int max = XML_MAX_NAMELEN;
3158 xmlChar *ret = NULL;
3159 const xmlChar *cur = name;
3160 int c;
3161
3162 if (prefix == NULL) return(NULL);
3163 *prefix = NULL;
3164
3165 if (cur == NULL) return(NULL);
3166
3167 #ifndef XML_XML_NAMESPACE
3168 /* xml: prefix is not really a namespace */
3169 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3170 (cur[2] == 'l') && (cur[3] == ':'))
3171 return(xmlStrdup(name));
3172 #endif
3173
3174 /* nasty but well=formed */
3175 if (cur[0] == ':')
3176 return(xmlStrdup(name));
3177
3178 c = *cur++;
3179 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3180 buf[len++] = c;
3181 c = *cur++;
3182 }
3183 if (len >= max) {
3184 /*
3185 * Okay someone managed to make a huge name, so he's ready to pay
3186 * for the processing speed.
3187 */
3188 max = len * 2;
3189
3190 buffer = (xmlChar *) xmlMallocAtomic(max);
3191 if (buffer == NULL) {
3192 xmlErrMemory(ctxt, NULL);
3193 return(NULL);
3194 }
3195 memcpy(buffer, buf, len);
3196 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3197 if (len + 10 > max) {
3198 xmlChar *tmp;
3199
3200 max *= 2;
3201 tmp = (xmlChar *) xmlRealloc(buffer, max);
3202 if (tmp == NULL) {
3203 xmlFree(buffer);
3204 xmlErrMemory(ctxt, NULL);
3205 return(NULL);
3206 }
3207 buffer = tmp;
3208 }
3209 buffer[len++] = c;
3210 c = *cur++;
3211 }
3212 buffer[len] = 0;
3213 }
3214
3215 if ((c == ':') && (*cur == 0)) {
3216 if (buffer != NULL)
3217 xmlFree(buffer);
3218 *prefix = NULL;
3219 return(xmlStrdup(name));
3220 }
3221
3222 if (buffer == NULL)
3223 ret = xmlStrndup(buf, len);
3224 else {
3225 ret = buffer;
3226 buffer = NULL;
3227 max = XML_MAX_NAMELEN;
3228 }
3229
3230
3231 if (c == ':') {
3232 c = *cur;
3233 *prefix = ret;
3234 if (c == 0) {
3235 return(xmlStrndup(BAD_CAST "", 0));
3236 }
3237 len = 0;
3238
3239 /*
3240 * Check that the first character is proper to start
3241 * a new name
3242 */
3243 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3244 ((c >= 0x41) && (c <= 0x5A)) ||
3245 (c == '_') || (c == ':'))) {
3246 int l;
3247 int first = CUR_SCHAR(cur, l);
3248
3249 if (!IS_LETTER(first) && (first != '_')) {
3250 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3251 "Name %s is not XML Namespace compliant\n",
3252 name);
3253 }
3254 }
3255 cur++;
3256
3257 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3258 buf[len++] = c;
3259 c = *cur++;
3260 }
3261 if (len >= max) {
3262 /*
3263 * Okay someone managed to make a huge name, so he's ready to pay
3264 * for the processing speed.
3265 */
3266 max = len * 2;
3267
3268 buffer = (xmlChar *) xmlMallocAtomic(max);
3269 if (buffer == NULL) {
3270 xmlErrMemory(ctxt, NULL);
3271 return(NULL);
3272 }
3273 memcpy(buffer, buf, len);
3274 while (c != 0) { /* tested bigname2.xml */
3275 if (len + 10 > max) {
3276 xmlChar *tmp;
3277
3278 max *= 2;
3279 tmp = (xmlChar *) xmlRealloc(buffer, max);
3280 if (tmp == NULL) {
3281 xmlErrMemory(ctxt, NULL);
3282 xmlFree(buffer);
3283 return(NULL);
3284 }
3285 buffer = tmp;
3286 }
3287 buffer[len++] = c;
3288 c = *cur++;
3289 }
3290 buffer[len] = 0;
3291 }
3292
3293 if (buffer == NULL)
3294 ret = xmlStrndup(buf, len);
3295 else {
3296 ret = buffer;
3297 }
3298 }
3299
3300 return(ret);
3301 }
3302
3303 /************************************************************************
3304 * *
3305 * The parser itself *
3306 * Relates to http://www.w3.org/TR/REC-xml *
3307 * *
3308 ************************************************************************/
3309
3310 /************************************************************************
3311 * *
3312 * Routines to parse Name, NCName and NmToken *
3313 * *
3314 ************************************************************************/
3315
3316 /*
3317 * The two following functions are related to the change of accepted
3318 * characters for Name and NmToken in the Revision 5 of XML-1.0
3319 * They correspond to the modified production [4] and the new production [4a]
3320 * changes in that revision. Also note that the macros used for the
3321 * productions Letter, Digit, CombiningChar and Extender are not needed
3322 * anymore.
3323 * We still keep compatibility to pre-revision5 parsing semantic if the
3324 * new XML_PARSE_OLD10 option is given to the parser.
3325 */
3326 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3327 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3328 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3329 /*
3330 * Use the new checks of production [4] [4a] amd [5] of the
3331 * Update 5 of XML-1.0
3332 */
3333 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3334 (((c >= 'a') && (c <= 'z')) ||
3335 ((c >= 'A') && (c <= 'Z')) ||
3336 (c == '_') || (c == ':') ||
3337 ((c >= 0xC0) && (c <= 0xD6)) ||
3338 ((c >= 0xD8) && (c <= 0xF6)) ||
3339 ((c >= 0xF8) && (c <= 0x2FF)) ||
3340 ((c >= 0x370) && (c <= 0x37D)) ||
3341 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3342 ((c >= 0x200C) && (c <= 0x200D)) ||
3343 ((c >= 0x2070) && (c <= 0x218F)) ||
3344 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3345 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3346 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3347 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3348 ((c >= 0x10000) && (c <= 0xEFFFF))))
3349 return(1);
3350 } else {
3351 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3352 return(1);
3353 }
3354 return(0);
3355 }
3356
3357 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3358 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3359 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3360 /*
3361 * Use the new checks of production [4] [4a] amd [5] of the
3362 * Update 5 of XML-1.0
3363 */
3364 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3365 (((c >= 'a') && (c <= 'z')) ||
3366 ((c >= 'A') && (c <= 'Z')) ||
3367 ((c >= '0') && (c <= '9')) || /* !start */
3368 (c == '_') || (c == ':') ||
3369 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3370 ((c >= 0xC0) && (c <= 0xD6)) ||
3371 ((c >= 0xD8) && (c <= 0xF6)) ||
3372 ((c >= 0xF8) && (c <= 0x2FF)) ||
3373 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3374 ((c >= 0x370) && (c <= 0x37D)) ||
3375 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3376 ((c >= 0x200C) && (c <= 0x200D)) ||
3377 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3378 ((c >= 0x2070) && (c <= 0x218F)) ||
3379 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3380 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3381 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3382 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3383 ((c >= 0x10000) && (c <= 0xEFFFF))))
3384 return(1);
3385 } else {
3386 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3387 (c == '.') || (c == '-') ||
3388 (c == '_') || (c == ':') ||
3389 (IS_COMBINING(c)) ||
3390 (IS_EXTENDER(c)))
3391 return(1);
3392 }
3393 return(0);
3394 }
3395
3396 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3397 int *len, int *alloc, int normalize);
3398
3399 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3400 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3401 int len = 0, l;
3402 int c;
3403 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404 XML_MAX_TEXT_LENGTH :
3405 XML_MAX_NAME_LENGTH;
3406
3407 /*
3408 * Handler for more complex cases
3409 */
3410 c = CUR_CHAR(l);
3411 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3412 /*
3413 * Use the new checks of production [4] [4a] amd [5] of the
3414 * Update 5 of XML-1.0
3415 */
3416 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3417 (!(((c >= 'a') && (c <= 'z')) ||
3418 ((c >= 'A') && (c <= 'Z')) ||
3419 (c == '_') || (c == ':') ||
3420 ((c >= 0xC0) && (c <= 0xD6)) ||
3421 ((c >= 0xD8) && (c <= 0xF6)) ||
3422 ((c >= 0xF8) && (c <= 0x2FF)) ||
3423 ((c >= 0x370) && (c <= 0x37D)) ||
3424 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3425 ((c >= 0x200C) && (c <= 0x200D)) ||
3426 ((c >= 0x2070) && (c <= 0x218F)) ||
3427 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3428 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3429 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3430 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3431 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3432 return(NULL);
3433 }
3434 len += l;
3435 NEXTL(l);
3436 c = CUR_CHAR(l);
3437 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3438 (((c >= 'a') && (c <= 'z')) ||
3439 ((c >= 'A') && (c <= 'Z')) ||
3440 ((c >= '0') && (c <= '9')) || /* !start */
3441 (c == '_') || (c == ':') ||
3442 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3443 ((c >= 0xC0) && (c <= 0xD6)) ||
3444 ((c >= 0xD8) && (c <= 0xF6)) ||
3445 ((c >= 0xF8) && (c <= 0x2FF)) ||
3446 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3447 ((c >= 0x370) && (c <= 0x37D)) ||
3448 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3449 ((c >= 0x200C) && (c <= 0x200D)) ||
3450 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3451 ((c >= 0x2070) && (c <= 0x218F)) ||
3452 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3453 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3454 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3455 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3456 ((c >= 0x10000) && (c <= 0xEFFFF))
3457 )) {
3458 if (len <= INT_MAX - l)
3459 len += l;
3460 NEXTL(l);
3461 c = CUR_CHAR(l);
3462 }
3463 } else {
3464 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3465 (!IS_LETTER(c) && (c != '_') &&
3466 (c != ':'))) {
3467 return(NULL);
3468 }
3469 len += l;
3470 NEXTL(l);
3471 c = CUR_CHAR(l);
3472
3473 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3474 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3475 (c == '.') || (c == '-') ||
3476 (c == '_') || (c == ':') ||
3477 (IS_COMBINING(c)) ||
3478 (IS_EXTENDER(c)))) {
3479 if (len <= INT_MAX - l)
3480 len += l;
3481 NEXTL(l);
3482 c = CUR_CHAR(l);
3483 }
3484 }
3485 if (ctxt->instate == XML_PARSER_EOF)
3486 return(NULL);
3487 if (len > maxLength) {
3488 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3489 return(NULL);
3490 }
3491 if (ctxt->input->cur - ctxt->input->base < len) {
3492 /*
3493 * There were a couple of bugs where PERefs lead to to a change
3494 * of the buffer. Check the buffer size to avoid passing an invalid
3495 * pointer to xmlDictLookup.
3496 */
3497 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3498 "unexpected change of input buffer");
3499 return (NULL);
3500 }
3501 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3502 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3503 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3504 }
3505
3506 /**
3507 * xmlParseName:
3508 * @ctxt: an XML parser context
3509 *
3510 * DEPRECATED: Internal function, don't use.
3511 *
3512 * parse an XML name.
3513 *
3514 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3515 * CombiningChar | Extender
3516 *
3517 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3518 *
3519 * [6] Names ::= Name (#x20 Name)*
3520 *
3521 * Returns the Name parsed or NULL
3522 */
3523
3524 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3525 xmlParseName(xmlParserCtxtPtr ctxt) {
3526 const xmlChar *in;
3527 const xmlChar *ret;
3528 size_t count = 0;
3529 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3530 XML_MAX_TEXT_LENGTH :
3531 XML_MAX_NAME_LENGTH;
3532
3533 GROW;
3534 if (ctxt->instate == XML_PARSER_EOF)
3535 return(NULL);
3536
3537 /*
3538 * Accelerator for simple ASCII names
3539 */
3540 in = ctxt->input->cur;
3541 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3542 ((*in >= 0x41) && (*in <= 0x5A)) ||
3543 (*in == '_') || (*in == ':')) {
3544 in++;
3545 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3546 ((*in >= 0x41) && (*in <= 0x5A)) ||
3547 ((*in >= 0x30) && (*in <= 0x39)) ||
3548 (*in == '_') || (*in == '-') ||
3549 (*in == ':') || (*in == '.'))
3550 in++;
3551 if ((*in > 0) && (*in < 0x80)) {
3552 count = in - ctxt->input->cur;
3553 if (count > maxLength) {
3554 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3555 return(NULL);
3556 }
3557 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3558 ctxt->input->cur = in;
3559 ctxt->input->col += count;
3560 if (ret == NULL)
3561 xmlErrMemory(ctxt, NULL);
3562 return(ret);
3563 }
3564 }
3565 /* accelerator for special cases */
3566 return(xmlParseNameComplex(ctxt));
3567 }
3568
3569 static xmlHashedString
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3570 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3571 xmlHashedString ret;
3572 int len = 0, l;
3573 int c;
3574 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3575 XML_MAX_TEXT_LENGTH :
3576 XML_MAX_NAME_LENGTH;
3577 size_t startPosition = 0;
3578
3579 ret.name = NULL;
3580 ret.hashValue = 0;
3581
3582 /*
3583 * Handler for more complex cases
3584 */
3585 startPosition = CUR_PTR - BASE_PTR;
3586 c = CUR_CHAR(l);
3587 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3588 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3589 return(ret);
3590 }
3591
3592 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3593 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3594 if (len <= INT_MAX - l)
3595 len += l;
3596 NEXTL(l);
3597 c = CUR_CHAR(l);
3598 }
3599 if (ctxt->instate == XML_PARSER_EOF)
3600 return(ret);
3601 if (len > maxLength) {
3602 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3603 return(ret);
3604 }
3605 ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3606 return(ret);
3607 }
3608
3609 /**
3610 * xmlParseNCName:
3611 * @ctxt: an XML parser context
3612 * @len: length of the string parsed
3613 *
3614 * parse an XML name.
3615 *
3616 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3617 * CombiningChar | Extender
3618 *
3619 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3620 *
3621 * Returns the Name parsed or NULL
3622 */
3623
3624 static xmlHashedString
xmlParseNCName(xmlParserCtxtPtr ctxt)3625 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3626 const xmlChar *in, *e;
3627 xmlHashedString ret;
3628 size_t count = 0;
3629 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3630 XML_MAX_TEXT_LENGTH :
3631 XML_MAX_NAME_LENGTH;
3632
3633 ret.name = NULL;
3634
3635 /*
3636 * Accelerator for simple ASCII names
3637 */
3638 in = ctxt->input->cur;
3639 e = ctxt->input->end;
3640 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3641 ((*in >= 0x41) && (*in <= 0x5A)) ||
3642 (*in == '_')) && (in < e)) {
3643 in++;
3644 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3645 ((*in >= 0x41) && (*in <= 0x5A)) ||
3646 ((*in >= 0x30) && (*in <= 0x39)) ||
3647 (*in == '_') || (*in == '-') ||
3648 (*in == '.')) && (in < e))
3649 in++;
3650 if (in >= e)
3651 goto complex;
3652 if ((*in > 0) && (*in < 0x80)) {
3653 count = in - ctxt->input->cur;
3654 if (count > maxLength) {
3655 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3656 return(ret);
3657 }
3658 ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3659 ctxt->input->cur = in;
3660 ctxt->input->col += count;
3661 if (ret.name == NULL) {
3662 xmlErrMemory(ctxt, NULL);
3663 }
3664 return(ret);
3665 }
3666 }
3667 complex:
3668 return(xmlParseNCNameComplex(ctxt));
3669 }
3670
3671 /**
3672 * xmlParseNameAndCompare:
3673 * @ctxt: an XML parser context
3674 *
3675 * parse an XML name and compares for match
3676 * (specialized for endtag parsing)
3677 *
3678 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3679 * and the name for mismatch
3680 */
3681
3682 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3683 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3684 register const xmlChar *cmp = other;
3685 register const xmlChar *in;
3686 const xmlChar *ret;
3687
3688 GROW;
3689 if (ctxt->instate == XML_PARSER_EOF)
3690 return(NULL);
3691
3692 in = ctxt->input->cur;
3693 while (*in != 0 && *in == *cmp) {
3694 ++in;
3695 ++cmp;
3696 }
3697 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3698 /* success */
3699 ctxt->input->col += in - ctxt->input->cur;
3700 ctxt->input->cur = in;
3701 return (const xmlChar*) 1;
3702 }
3703 /* failure (or end of input buffer), check with full function */
3704 ret = xmlParseName (ctxt);
3705 /* strings coming from the dictionary direct compare possible */
3706 if (ret == other) {
3707 return (const xmlChar*) 1;
3708 }
3709 return ret;
3710 }
3711
3712 /**
3713 * xmlParseStringName:
3714 * @ctxt: an XML parser context
3715 * @str: a pointer to the string pointer (IN/OUT)
3716 *
3717 * parse an XML name.
3718 *
3719 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3720 * CombiningChar | Extender
3721 *
3722 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3723 *
3724 * [6] Names ::= Name (#x20 Name)*
3725 *
3726 * Returns the Name parsed or NULL. The @str pointer
3727 * is updated to the current location in the string.
3728 */
3729
3730 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3731 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3732 xmlChar buf[XML_MAX_NAMELEN + 5];
3733 const xmlChar *cur = *str;
3734 int len = 0, l;
3735 int c;
3736 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3737 XML_MAX_TEXT_LENGTH :
3738 XML_MAX_NAME_LENGTH;
3739
3740 c = CUR_SCHAR(cur, l);
3741 if (!xmlIsNameStartChar(ctxt, c)) {
3742 return(NULL);
3743 }
3744
3745 COPY_BUF(buf, len, c);
3746 cur += l;
3747 c = CUR_SCHAR(cur, l);
3748 while (xmlIsNameChar(ctxt, c)) {
3749 COPY_BUF(buf, len, c);
3750 cur += l;
3751 c = CUR_SCHAR(cur, l);
3752 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3753 /*
3754 * Okay someone managed to make a huge name, so he's ready to pay
3755 * for the processing speed.
3756 */
3757 xmlChar *buffer;
3758 int max = len * 2;
3759
3760 buffer = (xmlChar *) xmlMallocAtomic(max);
3761 if (buffer == NULL) {
3762 xmlErrMemory(ctxt, NULL);
3763 return(NULL);
3764 }
3765 memcpy(buffer, buf, len);
3766 while (xmlIsNameChar(ctxt, c)) {
3767 if (len + 10 > max) {
3768 xmlChar *tmp;
3769
3770 max *= 2;
3771 tmp = (xmlChar *) xmlRealloc(buffer, max);
3772 if (tmp == NULL) {
3773 xmlErrMemory(ctxt, NULL);
3774 xmlFree(buffer);
3775 return(NULL);
3776 }
3777 buffer = tmp;
3778 }
3779 COPY_BUF(buffer, len, c);
3780 cur += l;
3781 c = CUR_SCHAR(cur, l);
3782 if (len > maxLength) {
3783 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3784 xmlFree(buffer);
3785 return(NULL);
3786 }
3787 }
3788 buffer[len] = 0;
3789 *str = cur;
3790 return(buffer);
3791 }
3792 }
3793 if (len > maxLength) {
3794 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3795 return(NULL);
3796 }
3797 *str = cur;
3798 return(xmlStrndup(buf, len));
3799 }
3800
3801 /**
3802 * xmlParseNmtoken:
3803 * @ctxt: an XML parser context
3804 *
3805 * DEPRECATED: Internal function, don't use.
3806 *
3807 * parse an XML Nmtoken.
3808 *
3809 * [7] Nmtoken ::= (NameChar)+
3810 *
3811 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3812 *
3813 * Returns the Nmtoken parsed or NULL
3814 */
3815
3816 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3817 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3818 xmlChar buf[XML_MAX_NAMELEN + 5];
3819 int len = 0, l;
3820 int c;
3821 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3822 XML_MAX_TEXT_LENGTH :
3823 XML_MAX_NAME_LENGTH;
3824
3825 c = CUR_CHAR(l);
3826
3827 while (xmlIsNameChar(ctxt, c)) {
3828 COPY_BUF(buf, len, c);
3829 NEXTL(l);
3830 c = CUR_CHAR(l);
3831 if (len >= XML_MAX_NAMELEN) {
3832 /*
3833 * Okay someone managed to make a huge token, so he's ready to pay
3834 * for the processing speed.
3835 */
3836 xmlChar *buffer;
3837 int max = len * 2;
3838
3839 buffer = (xmlChar *) xmlMallocAtomic(max);
3840 if (buffer == NULL) {
3841 xmlErrMemory(ctxt, NULL);
3842 return(NULL);
3843 }
3844 memcpy(buffer, buf, len);
3845 while (xmlIsNameChar(ctxt, c)) {
3846 if (len + 10 > max) {
3847 xmlChar *tmp;
3848
3849 max *= 2;
3850 tmp = (xmlChar *) xmlRealloc(buffer, max);
3851 if (tmp == NULL) {
3852 xmlErrMemory(ctxt, NULL);
3853 xmlFree(buffer);
3854 return(NULL);
3855 }
3856 buffer = tmp;
3857 }
3858 COPY_BUF(buffer, len, c);
3859 if (len > maxLength) {
3860 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3861 xmlFree(buffer);
3862 return(NULL);
3863 }
3864 NEXTL(l);
3865 c = CUR_CHAR(l);
3866 }
3867 buffer[len] = 0;
3868 if (ctxt->instate == XML_PARSER_EOF) {
3869 xmlFree(buffer);
3870 return(NULL);
3871 }
3872 return(buffer);
3873 }
3874 }
3875 if (ctxt->instate == XML_PARSER_EOF)
3876 return(NULL);
3877 if (len == 0)
3878 return(NULL);
3879 if (len > maxLength) {
3880 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3881 return(NULL);
3882 }
3883 return(xmlStrndup(buf, len));
3884 }
3885
3886 /**
3887 * xmlParseEntityValue:
3888 * @ctxt: an XML parser context
3889 * @orig: if non-NULL store a copy of the original entity value
3890 *
3891 * DEPRECATED: Internal function, don't use.
3892 *
3893 * parse a value for ENTITY declarations
3894 *
3895 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3896 * "'" ([^%&'] | PEReference | Reference)* "'"
3897 *
3898 * Returns the EntityValue parsed with reference substituted or NULL
3899 */
3900
3901 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3902 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3903 xmlChar *buf = NULL;
3904 int len = 0;
3905 int size = XML_PARSER_BUFFER_SIZE;
3906 int c, l;
3907 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3908 XML_MAX_HUGE_LENGTH :
3909 XML_MAX_TEXT_LENGTH;
3910 xmlChar stop;
3911 xmlChar *ret = NULL;
3912 const xmlChar *cur = NULL;
3913 xmlParserInputPtr input;
3914
3915 if (RAW == '"') stop = '"';
3916 else if (RAW == '\'') stop = '\'';
3917 else {
3918 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3919 return(NULL);
3920 }
3921 buf = (xmlChar *) xmlMallocAtomic(size);
3922 if (buf == NULL) {
3923 xmlErrMemory(ctxt, NULL);
3924 return(NULL);
3925 }
3926
3927 /*
3928 * The content of the entity definition is copied in a buffer.
3929 */
3930
3931 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3932 input = ctxt->input;
3933 GROW;
3934 if (ctxt->instate == XML_PARSER_EOF)
3935 goto error;
3936 NEXT;
3937 c = CUR_CHAR(l);
3938 /*
3939 * NOTE: 4.4.5 Included in Literal
3940 * When a parameter entity reference appears in a literal entity
3941 * value, ... a single or double quote character in the replacement
3942 * text is always treated as a normal data character and will not
3943 * terminate the literal.
3944 * In practice it means we stop the loop only when back at parsing
3945 * the initial entity and the quote is found
3946 */
3947 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3948 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3949 if (len + 5 >= size) {
3950 xmlChar *tmp;
3951
3952 size *= 2;
3953 tmp = (xmlChar *) xmlRealloc(buf, size);
3954 if (tmp == NULL) {
3955 xmlErrMemory(ctxt, NULL);
3956 goto error;
3957 }
3958 buf = tmp;
3959 }
3960 COPY_BUF(buf, len, c);
3961 NEXTL(l);
3962
3963 GROW;
3964 c = CUR_CHAR(l);
3965 if (c == 0) {
3966 GROW;
3967 c = CUR_CHAR(l);
3968 }
3969
3970 if (len > maxLength) {
3971 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3972 "entity value too long\n");
3973 goto error;
3974 }
3975 }
3976 buf[len] = 0;
3977 if (ctxt->instate == XML_PARSER_EOF)
3978 goto error;
3979 if (c != stop) {
3980 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3981 goto error;
3982 }
3983 NEXT;
3984
3985 /*
3986 * Raise problem w.r.t. '&' and '%' being used in non-entities
3987 * reference constructs. Note Charref will be handled in
3988 * xmlStringDecodeEntities()
3989 */
3990 cur = buf;
3991 while (*cur != 0) { /* non input consuming */
3992 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3993 xmlChar *name;
3994 xmlChar tmp = *cur;
3995 int nameOk = 0;
3996
3997 cur++;
3998 name = xmlParseStringName(ctxt, &cur);
3999 if (name != NULL) {
4000 nameOk = 1;
4001 xmlFree(name);
4002 }
4003 if ((nameOk == 0) || (*cur != ';')) {
4004 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
4005 "EntityValue: '%c' forbidden except for entities references\n",
4006 tmp);
4007 goto error;
4008 }
4009 if ((tmp == '%') && (ctxt->inSubset == 1) &&
4010 (ctxt->inputNr == 1)) {
4011 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
4012 goto error;
4013 }
4014 if (*cur == 0)
4015 break;
4016 }
4017 cur++;
4018 }
4019
4020 /*
4021 * Then PEReference entities are substituted.
4022 *
4023 * NOTE: 4.4.7 Bypassed
4024 * When a general entity reference appears in the EntityValue in
4025 * an entity declaration, it is bypassed and left as is.
4026 * so XML_SUBSTITUTE_REF is not set here.
4027 */
4028 ++ctxt->depth;
4029 ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
4030 0, 0, 0, /* check */ 1);
4031 --ctxt->depth;
4032
4033 if (orig != NULL) {
4034 *orig = buf;
4035 buf = NULL;
4036 }
4037
4038 error:
4039 if (buf != NULL)
4040 xmlFree(buf);
4041 return(ret);
4042 }
4043
4044 /**
4045 * xmlParseAttValueComplex:
4046 * @ctxt: an XML parser context
4047 * @len: the resulting attribute len
4048 * @normalize: whether to apply the inner normalization
4049 *
4050 * parse a value for an attribute, this is the fallback function
4051 * of xmlParseAttValue() when the attribute parsing requires handling
4052 * of non-ASCII characters, or normalization compaction.
4053 *
4054 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4055 */
4056 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)4057 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4058 xmlChar limit = 0;
4059 xmlChar *buf = NULL;
4060 xmlChar *rep = NULL;
4061 size_t len = 0;
4062 size_t buf_size = 0;
4063 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4064 XML_MAX_HUGE_LENGTH :
4065 XML_MAX_TEXT_LENGTH;
4066 int c, l, in_space = 0;
4067 xmlChar *current = NULL;
4068 xmlEntityPtr ent;
4069
4070 if (NXT(0) == '"') {
4071 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4072 limit = '"';
4073 NEXT;
4074 } else if (NXT(0) == '\'') {
4075 limit = '\'';
4076 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4077 NEXT;
4078 } else {
4079 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4080 return(NULL);
4081 }
4082
4083 /*
4084 * allocate a translation buffer.
4085 */
4086 buf_size = XML_PARSER_BUFFER_SIZE;
4087 buf = (xmlChar *) xmlMallocAtomic(buf_size);
4088 if (buf == NULL) goto mem_error;
4089
4090 /*
4091 * OK loop until we reach one of the ending char or a size limit.
4092 */
4093 c = CUR_CHAR(l);
4094 while (((NXT(0) != limit) && /* checked */
4095 (IS_CHAR(c)) && (c != '<')) &&
4096 (ctxt->instate != XML_PARSER_EOF)) {
4097 if (c == '&') {
4098 in_space = 0;
4099 if (NXT(1) == '#') {
4100 int val = xmlParseCharRef(ctxt);
4101
4102 if (val == '&') {
4103 if (ctxt->replaceEntities) {
4104 if (len + 10 > buf_size) {
4105 growBuffer(buf, 10);
4106 }
4107 buf[len++] = '&';
4108 } else {
4109 /*
4110 * The reparsing will be done in xmlStringGetNodeList()
4111 * called by the attribute() function in SAX.c
4112 */
4113 if (len + 10 > buf_size) {
4114 growBuffer(buf, 10);
4115 }
4116 buf[len++] = '&';
4117 buf[len++] = '#';
4118 buf[len++] = '3';
4119 buf[len++] = '8';
4120 buf[len++] = ';';
4121 }
4122 } else if (val != 0) {
4123 if (len + 10 > buf_size) {
4124 growBuffer(buf, 10);
4125 }
4126 len += xmlCopyChar(0, &buf[len], val);
4127 }
4128 } else {
4129 ent = xmlParseEntityRef(ctxt);
4130 if ((ent != NULL) &&
4131 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4132 if (len + 10 > buf_size) {
4133 growBuffer(buf, 10);
4134 }
4135 if ((ctxt->replaceEntities == 0) &&
4136 (ent->content[0] == '&')) {
4137 buf[len++] = '&';
4138 buf[len++] = '#';
4139 buf[len++] = '3';
4140 buf[len++] = '8';
4141 buf[len++] = ';';
4142 } else {
4143 buf[len++] = ent->content[0];
4144 }
4145 } else if ((ent != NULL) &&
4146 (ctxt->replaceEntities != 0)) {
4147 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4148 if (xmlParserEntityCheck(ctxt, ent->length))
4149 goto error;
4150
4151 ++ctxt->depth;
4152 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4153 ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4154 /* check */ 1);
4155 --ctxt->depth;
4156 if (rep != NULL) {
4157 current = rep;
4158 while (*current != 0) { /* non input consuming */
4159 if ((*current == 0xD) || (*current == 0xA) ||
4160 (*current == 0x9)) {
4161 buf[len++] = 0x20;
4162 current++;
4163 } else
4164 buf[len++] = *current++;
4165 if (len + 10 > buf_size) {
4166 growBuffer(buf, 10);
4167 }
4168 }
4169 xmlFree(rep);
4170 rep = NULL;
4171 }
4172 } else {
4173 if (len + 10 > buf_size) {
4174 growBuffer(buf, 10);
4175 }
4176 if (ent->content != NULL)
4177 buf[len++] = ent->content[0];
4178 }
4179 } else if (ent != NULL) {
4180 int i = xmlStrlen(ent->name);
4181 const xmlChar *cur = ent->name;
4182
4183 /*
4184 * We also check for recursion and amplification
4185 * when entities are not substituted. They're
4186 * often expanded later.
4187 */
4188 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4189 (ent->content != NULL)) {
4190 if ((ent->flags & XML_ENT_CHECKED) == 0) {
4191 unsigned long oldCopy = ctxt->sizeentcopy;
4192
4193 ctxt->sizeentcopy = ent->length;
4194
4195 ++ctxt->depth;
4196 rep = xmlStringDecodeEntitiesInt(ctxt,
4197 ent->content, ent->length,
4198 XML_SUBSTITUTE_REF, 0, 0, 0,
4199 /* check */ 1);
4200 --ctxt->depth;
4201
4202 /*
4203 * If we're parsing DTD content, the entity
4204 * might reference other entities which
4205 * weren't defined yet, so the check isn't
4206 * reliable.
4207 */
4208 if (ctxt->inSubset == 0) {
4209 ent->flags |= XML_ENT_CHECKED;
4210 ent->expandedSize = ctxt->sizeentcopy;
4211 }
4212
4213 if (rep != NULL) {
4214 xmlFree(rep);
4215 rep = NULL;
4216 } else {
4217 ent->content[0] = 0;
4218 }
4219
4220 if (xmlParserEntityCheck(ctxt, oldCopy))
4221 goto error;
4222 } else {
4223 if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4224 goto error;
4225 }
4226 }
4227
4228 /*
4229 * Just output the reference
4230 */
4231 buf[len++] = '&';
4232 while (len + i + 10 > buf_size) {
4233 growBuffer(buf, i + 10);
4234 }
4235 for (;i > 0;i--)
4236 buf[len++] = *cur++;
4237 buf[len++] = ';';
4238 }
4239 }
4240 } else {
4241 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4242 if ((len != 0) || (!normalize)) {
4243 if ((!normalize) || (!in_space)) {
4244 COPY_BUF(buf, len, 0x20);
4245 while (len + 10 > buf_size) {
4246 growBuffer(buf, 10);
4247 }
4248 }
4249 in_space = 1;
4250 }
4251 } else {
4252 in_space = 0;
4253 COPY_BUF(buf, len, c);
4254 if (len + 10 > buf_size) {
4255 growBuffer(buf, 10);
4256 }
4257 }
4258 NEXTL(l);
4259 }
4260 GROW;
4261 c = CUR_CHAR(l);
4262 if (len > maxLength) {
4263 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4264 "AttValue length too long\n");
4265 goto mem_error;
4266 }
4267 }
4268 if (ctxt->instate == XML_PARSER_EOF)
4269 goto error;
4270
4271 if ((in_space) && (normalize)) {
4272 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4273 }
4274 buf[len] = 0;
4275 if (RAW == '<') {
4276 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4277 } else if (RAW != limit) {
4278 if ((c != 0) && (!IS_CHAR(c))) {
4279 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4280 "invalid character in attribute value\n");
4281 } else {
4282 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4283 "AttValue: ' expected\n");
4284 }
4285 } else
4286 NEXT;
4287
4288 if (attlen != NULL) *attlen = len;
4289 return(buf);
4290
4291 mem_error:
4292 xmlErrMemory(ctxt, NULL);
4293 error:
4294 if (buf != NULL)
4295 xmlFree(buf);
4296 if (rep != NULL)
4297 xmlFree(rep);
4298 return(NULL);
4299 }
4300
4301 /**
4302 * xmlParseAttValue:
4303 * @ctxt: an XML parser context
4304 *
4305 * DEPRECATED: Internal function, don't use.
4306 *
4307 * parse a value for an attribute
4308 * Note: the parser won't do substitution of entities here, this
4309 * will be handled later in xmlStringGetNodeList
4310 *
4311 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4312 * "'" ([^<&'] | Reference)* "'"
4313 *
4314 * 3.3.3 Attribute-Value Normalization:
4315 * Before the value of an attribute is passed to the application or
4316 * checked for validity, the XML processor must normalize it as follows:
4317 * - a character reference is processed by appending the referenced
4318 * character to the attribute value
4319 * - an entity reference is processed by recursively processing the
4320 * replacement text of the entity
4321 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4322 * appending #x20 to the normalized value, except that only a single
4323 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4324 * parsed entity or the literal entity value of an internal parsed entity
4325 * - other characters are processed by appending them to the normalized value
4326 * If the declared value is not CDATA, then the XML processor must further
4327 * process the normalized attribute value by discarding any leading and
4328 * trailing space (#x20) characters, and by replacing sequences of space
4329 * (#x20) characters by a single space (#x20) character.
4330 * All attributes for which no declaration has been read should be treated
4331 * by a non-validating parser as if declared CDATA.
4332 *
4333 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4334 */
4335
4336
4337 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4338 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4339 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4340 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4341 }
4342
4343 /**
4344 * xmlParseSystemLiteral:
4345 * @ctxt: an XML parser context
4346 *
4347 * DEPRECATED: Internal function, don't use.
4348 *
4349 * parse an XML Literal
4350 *
4351 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4352 *
4353 * Returns the SystemLiteral parsed or NULL
4354 */
4355
4356 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4357 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4358 xmlChar *buf = NULL;
4359 int len = 0;
4360 int size = XML_PARSER_BUFFER_SIZE;
4361 int cur, l;
4362 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4363 XML_MAX_TEXT_LENGTH :
4364 XML_MAX_NAME_LENGTH;
4365 xmlChar stop;
4366 int state = ctxt->instate;
4367
4368 if (RAW == '"') {
4369 NEXT;
4370 stop = '"';
4371 } else if (RAW == '\'') {
4372 NEXT;
4373 stop = '\'';
4374 } else {
4375 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4376 return(NULL);
4377 }
4378
4379 buf = (xmlChar *) xmlMallocAtomic(size);
4380 if (buf == NULL) {
4381 xmlErrMemory(ctxt, NULL);
4382 return(NULL);
4383 }
4384 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4385 cur = CUR_CHAR(l);
4386 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4387 if (len + 5 >= size) {
4388 xmlChar *tmp;
4389
4390 size *= 2;
4391 tmp = (xmlChar *) xmlRealloc(buf, size);
4392 if (tmp == NULL) {
4393 xmlFree(buf);
4394 xmlErrMemory(ctxt, NULL);
4395 ctxt->instate = (xmlParserInputState) state;
4396 return(NULL);
4397 }
4398 buf = tmp;
4399 }
4400 COPY_BUF(buf, len, cur);
4401 if (len > maxLength) {
4402 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4403 xmlFree(buf);
4404 ctxt->instate = (xmlParserInputState) state;
4405 return(NULL);
4406 }
4407 NEXTL(l);
4408 cur = CUR_CHAR(l);
4409 }
4410 buf[len] = 0;
4411 if (ctxt->instate == XML_PARSER_EOF) {
4412 xmlFree(buf);
4413 return(NULL);
4414 }
4415 ctxt->instate = (xmlParserInputState) state;
4416 if (!IS_CHAR(cur)) {
4417 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4418 } else {
4419 NEXT;
4420 }
4421 return(buf);
4422 }
4423
4424 /**
4425 * xmlParsePubidLiteral:
4426 * @ctxt: an XML parser context
4427 *
4428 * DEPRECATED: Internal function, don't use.
4429 *
4430 * parse an XML public literal
4431 *
4432 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4433 *
4434 * Returns the PubidLiteral parsed or NULL.
4435 */
4436
4437 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4438 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4439 xmlChar *buf = NULL;
4440 int len = 0;
4441 int size = XML_PARSER_BUFFER_SIZE;
4442 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4443 XML_MAX_TEXT_LENGTH :
4444 XML_MAX_NAME_LENGTH;
4445 xmlChar cur;
4446 xmlChar stop;
4447 xmlParserInputState oldstate = ctxt->instate;
4448
4449 if (RAW == '"') {
4450 NEXT;
4451 stop = '"';
4452 } else if (RAW == '\'') {
4453 NEXT;
4454 stop = '\'';
4455 } else {
4456 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4457 return(NULL);
4458 }
4459 buf = (xmlChar *) xmlMallocAtomic(size);
4460 if (buf == NULL) {
4461 xmlErrMemory(ctxt, NULL);
4462 return(NULL);
4463 }
4464 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4465 cur = CUR;
4466 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4467 if (len + 1 >= size) {
4468 xmlChar *tmp;
4469
4470 size *= 2;
4471 tmp = (xmlChar *) xmlRealloc(buf, size);
4472 if (tmp == NULL) {
4473 xmlErrMemory(ctxt, NULL);
4474 xmlFree(buf);
4475 return(NULL);
4476 }
4477 buf = tmp;
4478 }
4479 buf[len++] = cur;
4480 if (len > maxLength) {
4481 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4482 xmlFree(buf);
4483 return(NULL);
4484 }
4485 NEXT;
4486 cur = CUR;
4487 }
4488 buf[len] = 0;
4489 if (ctxt->instate == XML_PARSER_EOF) {
4490 xmlFree(buf);
4491 return(NULL);
4492 }
4493 if (cur != stop) {
4494 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4495 } else {
4496 NEXTL(1);
4497 }
4498 ctxt->instate = oldstate;
4499 return(buf);
4500 }
4501
4502 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4503
4504 /*
4505 * used for the test in the inner loop of the char data testing
4506 */
4507 static const unsigned char test_char_data[256] = {
4508 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4509 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4510 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4511 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4512 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4513 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4514 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4515 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4516 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4517 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4518 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4519 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4520 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4521 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4522 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4523 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4524 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4525 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4526 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4527 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4528 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4529 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4530 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4531 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4532 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4533 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4534 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4535 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4536 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4537 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4538 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4539 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4540 };
4541
4542 /**
4543 * xmlParseCharDataInternal:
4544 * @ctxt: an XML parser context
4545 * @partial: buffer may contain partial UTF-8 sequences
4546 *
4547 * Parse character data. Always makes progress if the first char isn't
4548 * '<' or '&'.
4549 *
4550 * The right angle bracket (>) may be represented using the string ">",
4551 * and must, for compatibility, be escaped using ">" or a character
4552 * reference when it appears in the string "]]>" in content, when that
4553 * string is not marking the end of a CDATA section.
4554 *
4555 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4556 */
4557 static void
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt,int partial)4558 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4559 const xmlChar *in;
4560 int nbchar = 0;
4561 int line = ctxt->input->line;
4562 int col = ctxt->input->col;
4563 int ccol;
4564
4565 GROW;
4566 /*
4567 * Accelerated common case where input don't need to be
4568 * modified before passing it to the handler.
4569 */
4570 in = ctxt->input->cur;
4571 do {
4572 get_more_space:
4573 while (*in == 0x20) { in++; ctxt->input->col++; }
4574 if (*in == 0xA) {
4575 do {
4576 ctxt->input->line++; ctxt->input->col = 1;
4577 in++;
4578 } while (*in == 0xA);
4579 goto get_more_space;
4580 }
4581 if (*in == '<') {
4582 nbchar = in - ctxt->input->cur;
4583 if (nbchar > 0) {
4584 const xmlChar *tmp = ctxt->input->cur;
4585 ctxt->input->cur = in;
4586
4587 if ((ctxt->sax != NULL) &&
4588 (ctxt->disableSAX == 0) &&
4589 (ctxt->sax->ignorableWhitespace !=
4590 ctxt->sax->characters)) {
4591 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4592 if (ctxt->sax->ignorableWhitespace != NULL)
4593 ctxt->sax->ignorableWhitespace(ctxt->userData,
4594 tmp, nbchar);
4595 } else {
4596 if (ctxt->sax->characters != NULL)
4597 ctxt->sax->characters(ctxt->userData,
4598 tmp, nbchar);
4599 if (*ctxt->space == -1)
4600 *ctxt->space = -2;
4601 }
4602 } else if ((ctxt->sax != NULL) &&
4603 (ctxt->disableSAX == 0) &&
4604 (ctxt->sax->characters != NULL)) {
4605 ctxt->sax->characters(ctxt->userData,
4606 tmp, nbchar);
4607 }
4608 }
4609 return;
4610 }
4611
4612 get_more:
4613 ccol = ctxt->input->col;
4614 while (test_char_data[*in]) {
4615 in++;
4616 ccol++;
4617 }
4618 ctxt->input->col = ccol;
4619 if (*in == 0xA) {
4620 do {
4621 ctxt->input->line++; ctxt->input->col = 1;
4622 in++;
4623 } while (*in == 0xA);
4624 goto get_more;
4625 }
4626 if (*in == ']') {
4627 if ((in[1] == ']') && (in[2] == '>')) {
4628 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4629 if (ctxt->instate != XML_PARSER_EOF)
4630 ctxt->input->cur = in + 1;
4631 return;
4632 }
4633 in++;
4634 ctxt->input->col++;
4635 goto get_more;
4636 }
4637 nbchar = in - ctxt->input->cur;
4638 if (nbchar > 0) {
4639 if ((ctxt->sax != NULL) &&
4640 (ctxt->disableSAX == 0) &&
4641 (ctxt->sax->ignorableWhitespace !=
4642 ctxt->sax->characters) &&
4643 (IS_BLANK_CH(*ctxt->input->cur))) {
4644 const xmlChar *tmp = ctxt->input->cur;
4645 ctxt->input->cur = in;
4646
4647 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4648 if (ctxt->sax->ignorableWhitespace != NULL)
4649 ctxt->sax->ignorableWhitespace(ctxt->userData,
4650 tmp, nbchar);
4651 } else {
4652 if (ctxt->sax->characters != NULL)
4653 ctxt->sax->characters(ctxt->userData,
4654 tmp, nbchar);
4655 if (*ctxt->space == -1)
4656 *ctxt->space = -2;
4657 }
4658 line = ctxt->input->line;
4659 col = ctxt->input->col;
4660 } else if ((ctxt->sax != NULL) &&
4661 (ctxt->disableSAX == 0)) {
4662 if (ctxt->sax->characters != NULL)
4663 ctxt->sax->characters(ctxt->userData,
4664 ctxt->input->cur, nbchar);
4665 line = ctxt->input->line;
4666 col = ctxt->input->col;
4667 }
4668 if (ctxt->instate == XML_PARSER_EOF)
4669 return;
4670 }
4671 ctxt->input->cur = in;
4672 if (*in == 0xD) {
4673 in++;
4674 if (*in == 0xA) {
4675 ctxt->input->cur = in;
4676 in++;
4677 ctxt->input->line++; ctxt->input->col = 1;
4678 continue; /* while */
4679 }
4680 in--;
4681 }
4682 if (*in == '<') {
4683 return;
4684 }
4685 if (*in == '&') {
4686 return;
4687 }
4688 SHRINK;
4689 GROW;
4690 if (ctxt->instate == XML_PARSER_EOF)
4691 return;
4692 in = ctxt->input->cur;
4693 } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4694 (*in == 0x09) || (*in == 0x0a));
4695 ctxt->input->line = line;
4696 ctxt->input->col = col;
4697 xmlParseCharDataComplex(ctxt, partial);
4698 }
4699
4700 /**
4701 * xmlParseCharDataComplex:
4702 * @ctxt: an XML parser context
4703 * @cdata: int indicating whether we are within a CDATA section
4704 *
4705 * Always makes progress if the first char isn't '<' or '&'.
4706 *
4707 * parse a CharData section.this is the fallback function
4708 * of xmlParseCharData() when the parsing requires handling
4709 * of non-ASCII characters.
4710 */
4711 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int partial)4712 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4713 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4714 int nbchar = 0;
4715 int cur, l;
4716
4717 cur = CUR_CHAR(l);
4718 while ((cur != '<') && /* checked */
4719 (cur != '&') &&
4720 (IS_CHAR(cur))) {
4721 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4722 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4723 }
4724 COPY_BUF(buf, nbchar, cur);
4725 /* move current position before possible calling of ctxt->sax->characters */
4726 NEXTL(l);
4727 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4728 buf[nbchar] = 0;
4729
4730 /*
4731 * OK the segment is to be consumed as chars.
4732 */
4733 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4734 if (areBlanks(ctxt, buf, nbchar, 0)) {
4735 if (ctxt->sax->ignorableWhitespace != NULL)
4736 ctxt->sax->ignorableWhitespace(ctxt->userData,
4737 buf, nbchar);
4738 } else {
4739 if (ctxt->sax->characters != NULL)
4740 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4741 if ((ctxt->sax->characters !=
4742 ctxt->sax->ignorableWhitespace) &&
4743 (*ctxt->space == -1))
4744 *ctxt->space = -2;
4745 }
4746 }
4747 nbchar = 0;
4748 /* something really bad happened in the SAX callback */
4749 if (ctxt->instate != XML_PARSER_CONTENT)
4750 return;
4751 SHRINK;
4752 }
4753 cur = CUR_CHAR(l);
4754 }
4755 if (ctxt->instate == XML_PARSER_EOF)
4756 return;
4757 if (nbchar != 0) {
4758 buf[nbchar] = 0;
4759 /*
4760 * OK the segment is to be consumed as chars.
4761 */
4762 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4763 if (areBlanks(ctxt, buf, nbchar, 0)) {
4764 if (ctxt->sax->ignorableWhitespace != NULL)
4765 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4766 } else {
4767 if (ctxt->sax->characters != NULL)
4768 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4769 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4770 (*ctxt->space == -1))
4771 *ctxt->space = -2;
4772 }
4773 }
4774 }
4775 /*
4776 * cur == 0 can mean
4777 *
4778 * - XML_PARSER_EOF or memory error. This is checked above.
4779 * - An actual 0 character.
4780 * - End of buffer.
4781 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4782 */
4783 if (ctxt->input->cur < ctxt->input->end) {
4784 if ((cur == 0) && (CUR != 0)) {
4785 if (partial == 0) {
4786 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4787 "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4788 NEXTL(1);
4789 }
4790 } else if ((cur != '<') && (cur != '&')) {
4791 /* Generate the error and skip the offending character */
4792 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4793 "PCDATA invalid Char value %d\n", cur);
4794 NEXTL(l);
4795 }
4796 }
4797 }
4798
4799 /**
4800 * xmlParseCharData:
4801 * @ctxt: an XML parser context
4802 * @cdata: unused
4803 *
4804 * DEPRECATED: Internal function, don't use.
4805 */
4806 void
xmlParseCharData(xmlParserCtxtPtr ctxt,ATTRIBUTE_UNUSED int cdata)4807 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4808 xmlParseCharDataInternal(ctxt, 0);
4809 }
4810
4811 /**
4812 * xmlParseExternalID:
4813 * @ctxt: an XML parser context
4814 * @publicID: a xmlChar** receiving PubidLiteral
4815 * @strict: indicate whether we should restrict parsing to only
4816 * production [75], see NOTE below
4817 *
4818 * DEPRECATED: Internal function, don't use.
4819 *
4820 * Parse an External ID or a Public ID
4821 *
4822 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4823 * 'PUBLIC' S PubidLiteral S SystemLiteral
4824 *
4825 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4826 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4827 *
4828 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4829 *
4830 * Returns the function returns SystemLiteral and in the second
4831 * case publicID receives PubidLiteral, is strict is off
4832 * it is possible to return NULL and have publicID set.
4833 */
4834
4835 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4836 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4837 xmlChar *URI = NULL;
4838
4839 *publicID = NULL;
4840 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4841 SKIP(6);
4842 if (SKIP_BLANKS == 0) {
4843 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4844 "Space required after 'SYSTEM'\n");
4845 }
4846 URI = xmlParseSystemLiteral(ctxt);
4847 if (URI == NULL) {
4848 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4849 }
4850 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4851 SKIP(6);
4852 if (SKIP_BLANKS == 0) {
4853 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4854 "Space required after 'PUBLIC'\n");
4855 }
4856 *publicID = xmlParsePubidLiteral(ctxt);
4857 if (*publicID == NULL) {
4858 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4859 }
4860 if (strict) {
4861 /*
4862 * We don't handle [83] so "S SystemLiteral" is required.
4863 */
4864 if (SKIP_BLANKS == 0) {
4865 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4866 "Space required after the Public Identifier\n");
4867 }
4868 } else {
4869 /*
4870 * We handle [83] so we return immediately, if
4871 * "S SystemLiteral" is not detected. We skip blanks if no
4872 * system literal was found, but this is harmless since we must
4873 * be at the end of a NotationDecl.
4874 */
4875 if (SKIP_BLANKS == 0) return(NULL);
4876 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4877 }
4878 URI = xmlParseSystemLiteral(ctxt);
4879 if (URI == NULL) {
4880 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4881 }
4882 }
4883 return(URI);
4884 }
4885
4886 /**
4887 * xmlParseCommentComplex:
4888 * @ctxt: an XML parser context
4889 * @buf: the already parsed part of the buffer
4890 * @len: number of bytes in the buffer
4891 * @size: allocated size of the buffer
4892 *
4893 * Skip an XML (SGML) comment <!-- .... -->
4894 * The spec says that "For compatibility, the string "--" (double-hyphen)
4895 * must not occur within comments. "
4896 * This is the slow routine in case the accelerator for ascii didn't work
4897 *
4898 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4899 */
4900 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)4901 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4902 size_t len, size_t size) {
4903 int q, ql;
4904 int r, rl;
4905 int cur, l;
4906 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4907 XML_MAX_HUGE_LENGTH :
4908 XML_MAX_TEXT_LENGTH;
4909 int inputid;
4910
4911 inputid = ctxt->input->id;
4912
4913 if (buf == NULL) {
4914 len = 0;
4915 size = XML_PARSER_BUFFER_SIZE;
4916 buf = (xmlChar *) xmlMallocAtomic(size);
4917 if (buf == NULL) {
4918 xmlErrMemory(ctxt, NULL);
4919 return;
4920 }
4921 }
4922 q = CUR_CHAR(ql);
4923 if (q == 0)
4924 goto not_terminated;
4925 if (!IS_CHAR(q)) {
4926 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4927 "xmlParseComment: invalid xmlChar value %d\n",
4928 q);
4929 xmlFree (buf);
4930 return;
4931 }
4932 NEXTL(ql);
4933 r = CUR_CHAR(rl);
4934 if (r == 0)
4935 goto not_terminated;
4936 if (!IS_CHAR(r)) {
4937 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4938 "xmlParseComment: invalid xmlChar value %d\n",
4939 r);
4940 xmlFree (buf);
4941 return;
4942 }
4943 NEXTL(rl);
4944 cur = CUR_CHAR(l);
4945 if (cur == 0)
4946 goto not_terminated;
4947 while (IS_CHAR(cur) && /* checked */
4948 ((cur != '>') ||
4949 (r != '-') || (q != '-'))) {
4950 if ((r == '-') && (q == '-')) {
4951 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4952 }
4953 if (len + 5 >= size) {
4954 xmlChar *new_buf;
4955 size_t new_size;
4956
4957 new_size = size * 2;
4958 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4959 if (new_buf == NULL) {
4960 xmlFree (buf);
4961 xmlErrMemory(ctxt, NULL);
4962 return;
4963 }
4964 buf = new_buf;
4965 size = new_size;
4966 }
4967 COPY_BUF(buf, len, q);
4968 if (len > maxLength) {
4969 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4970 "Comment too big found", NULL);
4971 xmlFree (buf);
4972 return;
4973 }
4974
4975 q = r;
4976 ql = rl;
4977 r = cur;
4978 rl = l;
4979
4980 NEXTL(l);
4981 cur = CUR_CHAR(l);
4982
4983 }
4984 buf[len] = 0;
4985 if (ctxt->instate == XML_PARSER_EOF) {
4986 xmlFree(buf);
4987 return;
4988 }
4989 if (cur == 0) {
4990 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4991 "Comment not terminated \n<!--%.50s\n", buf);
4992 } else if (!IS_CHAR(cur)) {
4993 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4994 "xmlParseComment: invalid xmlChar value %d\n",
4995 cur);
4996 } else {
4997 if (inputid != ctxt->input->id) {
4998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999 "Comment doesn't start and stop in the same"
5000 " entity\n");
5001 }
5002 NEXT;
5003 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5004 (!ctxt->disableSAX))
5005 ctxt->sax->comment(ctxt->userData, buf);
5006 }
5007 xmlFree(buf);
5008 return;
5009 not_terminated:
5010 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5011 "Comment not terminated\n", NULL);
5012 xmlFree(buf);
5013 return;
5014 }
5015
5016 /**
5017 * xmlParseComment:
5018 * @ctxt: an XML parser context
5019 *
5020 * DEPRECATED: Internal function, don't use.
5021 *
5022 * Parse an XML (SGML) comment. Always consumes '<!'.
5023 *
5024 * The spec says that "For compatibility, the string "--" (double-hyphen)
5025 * must not occur within comments. "
5026 *
5027 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5028 */
5029 void
xmlParseComment(xmlParserCtxtPtr ctxt)5030 xmlParseComment(xmlParserCtxtPtr ctxt) {
5031 xmlChar *buf = NULL;
5032 size_t size = XML_PARSER_BUFFER_SIZE;
5033 size_t len = 0;
5034 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5035 XML_MAX_HUGE_LENGTH :
5036 XML_MAX_TEXT_LENGTH;
5037 xmlParserInputState state;
5038 const xmlChar *in;
5039 size_t nbchar = 0;
5040 int ccol;
5041 int inputid;
5042
5043 /*
5044 * Check that there is a comment right here.
5045 */
5046 if ((RAW != '<') || (NXT(1) != '!'))
5047 return;
5048 SKIP(2);
5049 if ((RAW != '-') || (NXT(1) != '-'))
5050 return;
5051 state = ctxt->instate;
5052 ctxt->instate = XML_PARSER_COMMENT;
5053 inputid = ctxt->input->id;
5054 SKIP(2);
5055 GROW;
5056
5057 /*
5058 * Accelerated common case where input don't need to be
5059 * modified before passing it to the handler.
5060 */
5061 in = ctxt->input->cur;
5062 do {
5063 if (*in == 0xA) {
5064 do {
5065 ctxt->input->line++; ctxt->input->col = 1;
5066 in++;
5067 } while (*in == 0xA);
5068 }
5069 get_more:
5070 ccol = ctxt->input->col;
5071 while (((*in > '-') && (*in <= 0x7F)) ||
5072 ((*in >= 0x20) && (*in < '-')) ||
5073 (*in == 0x09)) {
5074 in++;
5075 ccol++;
5076 }
5077 ctxt->input->col = ccol;
5078 if (*in == 0xA) {
5079 do {
5080 ctxt->input->line++; ctxt->input->col = 1;
5081 in++;
5082 } while (*in == 0xA);
5083 goto get_more;
5084 }
5085 nbchar = in - ctxt->input->cur;
5086 /*
5087 * save current set of data
5088 */
5089 if (nbchar > 0) {
5090 if (buf == NULL) {
5091 if ((*in == '-') && (in[1] == '-'))
5092 size = nbchar + 1;
5093 else
5094 size = XML_PARSER_BUFFER_SIZE + nbchar;
5095 buf = (xmlChar *) xmlMallocAtomic(size);
5096 if (buf == NULL) {
5097 xmlErrMemory(ctxt, NULL);
5098 ctxt->instate = state;
5099 return;
5100 }
5101 len = 0;
5102 } else if (len + nbchar + 1 >= size) {
5103 xmlChar *new_buf;
5104 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5105 new_buf = (xmlChar *) xmlRealloc(buf, size);
5106 if (new_buf == NULL) {
5107 xmlFree (buf);
5108 xmlErrMemory(ctxt, NULL);
5109 ctxt->instate = state;
5110 return;
5111 }
5112 buf = new_buf;
5113 }
5114 memcpy(&buf[len], ctxt->input->cur, nbchar);
5115 len += nbchar;
5116 buf[len] = 0;
5117 }
5118 if (len > maxLength) {
5119 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5120 "Comment too big found", NULL);
5121 xmlFree (buf);
5122 return;
5123 }
5124 ctxt->input->cur = in;
5125 if (*in == 0xA) {
5126 in++;
5127 ctxt->input->line++; ctxt->input->col = 1;
5128 }
5129 if (*in == 0xD) {
5130 in++;
5131 if (*in == 0xA) {
5132 ctxt->input->cur = in;
5133 in++;
5134 ctxt->input->line++; ctxt->input->col = 1;
5135 goto get_more;
5136 }
5137 in--;
5138 }
5139 SHRINK;
5140 GROW;
5141 if (ctxt->instate == XML_PARSER_EOF) {
5142 xmlFree(buf);
5143 return;
5144 }
5145 in = ctxt->input->cur;
5146 if (*in == '-') {
5147 if (in[1] == '-') {
5148 if (in[2] == '>') {
5149 if (ctxt->input->id != inputid) {
5150 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5151 "comment doesn't start and stop in the"
5152 " same entity\n");
5153 }
5154 SKIP(3);
5155 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5156 (!ctxt->disableSAX)) {
5157 if (buf != NULL)
5158 ctxt->sax->comment(ctxt->userData, buf);
5159 else
5160 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5161 }
5162 if (buf != NULL)
5163 xmlFree(buf);
5164 if (ctxt->instate != XML_PARSER_EOF)
5165 ctxt->instate = state;
5166 return;
5167 }
5168 if (buf != NULL) {
5169 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5170 "Double hyphen within comment: "
5171 "<!--%.50s\n",
5172 buf);
5173 } else
5174 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5175 "Double hyphen within comment\n", NULL);
5176 if (ctxt->instate == XML_PARSER_EOF) {
5177 xmlFree(buf);
5178 return;
5179 }
5180 in++;
5181 ctxt->input->col++;
5182 }
5183 in++;
5184 ctxt->input->col++;
5185 goto get_more;
5186 }
5187 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5188 xmlParseCommentComplex(ctxt, buf, len, size);
5189 ctxt->instate = state;
5190 return;
5191 }
5192
5193
5194 /**
5195 * xmlParsePITarget:
5196 * @ctxt: an XML parser context
5197 *
5198 * DEPRECATED: Internal function, don't use.
5199 *
5200 * parse the name of a PI
5201 *
5202 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5203 *
5204 * Returns the PITarget name or NULL
5205 */
5206
5207 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5208 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5209 const xmlChar *name;
5210
5211 name = xmlParseName(ctxt);
5212 if ((name != NULL) &&
5213 ((name[0] == 'x') || (name[0] == 'X')) &&
5214 ((name[1] == 'm') || (name[1] == 'M')) &&
5215 ((name[2] == 'l') || (name[2] == 'L'))) {
5216 int i;
5217 if ((name[0] == 'x') && (name[1] == 'm') &&
5218 (name[2] == 'l') && (name[3] == 0)) {
5219 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5220 "XML declaration allowed only at the start of the document\n");
5221 return(name);
5222 } else if (name[3] == 0) {
5223 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5224 return(name);
5225 }
5226 for (i = 0;;i++) {
5227 if (xmlW3CPIs[i] == NULL) break;
5228 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5229 return(name);
5230 }
5231 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5232 "xmlParsePITarget: invalid name prefix 'xml'\n",
5233 NULL, NULL);
5234 }
5235 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5236 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5237 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5238 }
5239 return(name);
5240 }
5241
5242 #ifdef LIBXML_CATALOG_ENABLED
5243 /**
5244 * xmlParseCatalogPI:
5245 * @ctxt: an XML parser context
5246 * @catalog: the PI value string
5247 *
5248 * parse an XML Catalog Processing Instruction.
5249 *
5250 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5251 *
5252 * Occurs only if allowed by the user and if happening in the Misc
5253 * part of the document before any doctype information
5254 * This will add the given catalog to the parsing context in order
5255 * to be used if there is a resolution need further down in the document
5256 */
5257
5258 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5259 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5260 xmlChar *URL = NULL;
5261 const xmlChar *tmp, *base;
5262 xmlChar marker;
5263
5264 tmp = catalog;
5265 while (IS_BLANK_CH(*tmp)) tmp++;
5266 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5267 goto error;
5268 tmp += 7;
5269 while (IS_BLANK_CH(*tmp)) tmp++;
5270 if (*tmp != '=') {
5271 return;
5272 }
5273 tmp++;
5274 while (IS_BLANK_CH(*tmp)) tmp++;
5275 marker = *tmp;
5276 if ((marker != '\'') && (marker != '"'))
5277 goto error;
5278 tmp++;
5279 base = tmp;
5280 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5281 if (*tmp == 0)
5282 goto error;
5283 URL = xmlStrndup(base, tmp - base);
5284 tmp++;
5285 while (IS_BLANK_CH(*tmp)) tmp++;
5286 if (*tmp != 0)
5287 goto error;
5288
5289 if (URL != NULL) {
5290 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5291 xmlFree(URL);
5292 }
5293 return;
5294
5295 error:
5296 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5297 "Catalog PI syntax error: %s\n",
5298 catalog, NULL);
5299 if (URL != NULL)
5300 xmlFree(URL);
5301 }
5302 #endif
5303
5304 /**
5305 * xmlParsePI:
5306 * @ctxt: an XML parser context
5307 *
5308 * DEPRECATED: Internal function, don't use.
5309 *
5310 * parse an XML Processing Instruction.
5311 *
5312 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5313 *
5314 * The processing is transferred to SAX once parsed.
5315 */
5316
5317 void
xmlParsePI(xmlParserCtxtPtr ctxt)5318 xmlParsePI(xmlParserCtxtPtr ctxt) {
5319 xmlChar *buf = NULL;
5320 size_t len = 0;
5321 size_t size = XML_PARSER_BUFFER_SIZE;
5322 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5323 XML_MAX_HUGE_LENGTH :
5324 XML_MAX_TEXT_LENGTH;
5325 int cur, l;
5326 const xmlChar *target;
5327 xmlParserInputState state;
5328
5329 if ((RAW == '<') && (NXT(1) == '?')) {
5330 int inputid = ctxt->input->id;
5331 state = ctxt->instate;
5332 ctxt->instate = XML_PARSER_PI;
5333 /*
5334 * this is a Processing Instruction.
5335 */
5336 SKIP(2);
5337
5338 /*
5339 * Parse the target name and check for special support like
5340 * namespace.
5341 */
5342 target = xmlParsePITarget(ctxt);
5343 if (target != NULL) {
5344 if ((RAW == '?') && (NXT(1) == '>')) {
5345 if (inputid != ctxt->input->id) {
5346 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5347 "PI declaration doesn't start and stop in"
5348 " the same entity\n");
5349 }
5350 SKIP(2);
5351
5352 /*
5353 * SAX: PI detected.
5354 */
5355 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5356 (ctxt->sax->processingInstruction != NULL))
5357 ctxt->sax->processingInstruction(ctxt->userData,
5358 target, NULL);
5359 if (ctxt->instate != XML_PARSER_EOF)
5360 ctxt->instate = state;
5361 return;
5362 }
5363 buf = (xmlChar *) xmlMallocAtomic(size);
5364 if (buf == NULL) {
5365 xmlErrMemory(ctxt, NULL);
5366 ctxt->instate = state;
5367 return;
5368 }
5369 if (SKIP_BLANKS == 0) {
5370 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5371 "ParsePI: PI %s space expected\n", target);
5372 }
5373 cur = CUR_CHAR(l);
5374 while (IS_CHAR(cur) && /* checked */
5375 ((cur != '?') || (NXT(1) != '>'))) {
5376 if (len + 5 >= size) {
5377 xmlChar *tmp;
5378 size_t new_size = size * 2;
5379 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5380 if (tmp == NULL) {
5381 xmlErrMemory(ctxt, NULL);
5382 xmlFree(buf);
5383 ctxt->instate = state;
5384 return;
5385 }
5386 buf = tmp;
5387 size = new_size;
5388 }
5389 COPY_BUF(buf, len, cur);
5390 if (len > maxLength) {
5391 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5392 "PI %s too big found", target);
5393 xmlFree(buf);
5394 ctxt->instate = state;
5395 return;
5396 }
5397 NEXTL(l);
5398 cur = CUR_CHAR(l);
5399 }
5400 buf[len] = 0;
5401 if (ctxt->instate == XML_PARSER_EOF) {
5402 xmlFree(buf);
5403 return;
5404 }
5405 if (cur != '?') {
5406 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5407 "ParsePI: PI %s never end ...\n", target);
5408 } else {
5409 if (inputid != ctxt->input->id) {
5410 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5411 "PI declaration doesn't start and stop in"
5412 " the same entity\n");
5413 }
5414 SKIP(2);
5415
5416 #ifdef LIBXML_CATALOG_ENABLED
5417 if (((state == XML_PARSER_MISC) ||
5418 (state == XML_PARSER_START)) &&
5419 (xmlStrEqual(target, XML_CATALOG_PI))) {
5420 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5421 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5422 (allow == XML_CATA_ALLOW_ALL))
5423 xmlParseCatalogPI(ctxt, buf);
5424 }
5425 #endif
5426
5427
5428 /*
5429 * SAX: PI detected.
5430 */
5431 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5432 (ctxt->sax->processingInstruction != NULL))
5433 ctxt->sax->processingInstruction(ctxt->userData,
5434 target, buf);
5435 }
5436 xmlFree(buf);
5437 } else {
5438 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5439 }
5440 if (ctxt->instate != XML_PARSER_EOF)
5441 ctxt->instate = state;
5442 }
5443 }
5444
5445 /**
5446 * xmlParseNotationDecl:
5447 * @ctxt: an XML parser context
5448 *
5449 * DEPRECATED: Internal function, don't use.
5450 *
5451 * Parse a notation declaration. Always consumes '<!'.
5452 *
5453 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5454 *
5455 * Hence there is actually 3 choices:
5456 * 'PUBLIC' S PubidLiteral
5457 * 'PUBLIC' S PubidLiteral S SystemLiteral
5458 * and 'SYSTEM' S SystemLiteral
5459 *
5460 * See the NOTE on xmlParseExternalID().
5461 */
5462
5463 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5464 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5465 const xmlChar *name;
5466 xmlChar *Pubid;
5467 xmlChar *Systemid;
5468
5469 if ((CUR != '<') || (NXT(1) != '!'))
5470 return;
5471 SKIP(2);
5472
5473 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5474 int inputid = ctxt->input->id;
5475 SKIP(8);
5476 if (SKIP_BLANKS == 0) {
5477 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478 "Space required after '<!NOTATION'\n");
5479 return;
5480 }
5481
5482 name = xmlParseName(ctxt);
5483 if (name == NULL) {
5484 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5485 return;
5486 }
5487 if (xmlStrchr(name, ':') != NULL) {
5488 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5489 "colons are forbidden from notation names '%s'\n",
5490 name, NULL, NULL);
5491 }
5492 if (SKIP_BLANKS == 0) {
5493 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494 "Space required after the NOTATION name'\n");
5495 return;
5496 }
5497
5498 /*
5499 * Parse the IDs.
5500 */
5501 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5502 SKIP_BLANKS;
5503
5504 if (RAW == '>') {
5505 if (inputid != ctxt->input->id) {
5506 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5507 "Notation declaration doesn't start and stop"
5508 " in the same entity\n");
5509 }
5510 NEXT;
5511 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5512 (ctxt->sax->notationDecl != NULL))
5513 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5514 } else {
5515 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5516 }
5517 if (Systemid != NULL) xmlFree(Systemid);
5518 if (Pubid != NULL) xmlFree(Pubid);
5519 }
5520 }
5521
5522 /**
5523 * xmlParseEntityDecl:
5524 * @ctxt: an XML parser context
5525 *
5526 * DEPRECATED: Internal function, don't use.
5527 *
5528 * Parse an entity declaration. Always consumes '<!'.
5529 *
5530 * [70] EntityDecl ::= GEDecl | PEDecl
5531 *
5532 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5533 *
5534 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5535 *
5536 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5537 *
5538 * [74] PEDef ::= EntityValue | ExternalID
5539 *
5540 * [76] NDataDecl ::= S 'NDATA' S Name
5541 *
5542 * [ VC: Notation Declared ]
5543 * The Name must match the declared name of a notation.
5544 */
5545
5546 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5547 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5548 const xmlChar *name = NULL;
5549 xmlChar *value = NULL;
5550 xmlChar *URI = NULL, *literal = NULL;
5551 const xmlChar *ndata = NULL;
5552 int isParameter = 0;
5553 xmlChar *orig = NULL;
5554
5555 if ((CUR != '<') || (NXT(1) != '!'))
5556 return;
5557 SKIP(2);
5558
5559 /* GROW; done in the caller */
5560 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5561 int inputid = ctxt->input->id;
5562 SKIP(6);
5563 if (SKIP_BLANKS == 0) {
5564 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5565 "Space required after '<!ENTITY'\n");
5566 }
5567
5568 if (RAW == '%') {
5569 NEXT;
5570 if (SKIP_BLANKS == 0) {
5571 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5572 "Space required after '%%'\n");
5573 }
5574 isParameter = 1;
5575 }
5576
5577 name = xmlParseName(ctxt);
5578 if (name == NULL) {
5579 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5580 "xmlParseEntityDecl: no name\n");
5581 return;
5582 }
5583 if (xmlStrchr(name, ':') != NULL) {
5584 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5585 "colons are forbidden from entities names '%s'\n",
5586 name, NULL, NULL);
5587 }
5588 if (SKIP_BLANKS == 0) {
5589 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5590 "Space required after the entity name\n");
5591 }
5592
5593 ctxt->instate = XML_PARSER_ENTITY_DECL;
5594 /*
5595 * handle the various case of definitions...
5596 */
5597 if (isParameter) {
5598 if ((RAW == '"') || (RAW == '\'')) {
5599 value = xmlParseEntityValue(ctxt, &orig);
5600 if (value) {
5601 if ((ctxt->sax != NULL) &&
5602 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5603 ctxt->sax->entityDecl(ctxt->userData, name,
5604 XML_INTERNAL_PARAMETER_ENTITY,
5605 NULL, NULL, value);
5606 }
5607 } else {
5608 URI = xmlParseExternalID(ctxt, &literal, 1);
5609 if ((URI == NULL) && (literal == NULL)) {
5610 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5611 }
5612 if (URI) {
5613 xmlURIPtr uri;
5614
5615 uri = xmlParseURI((const char *) URI);
5616 if (uri == NULL) {
5617 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5618 "Invalid URI: %s\n", URI);
5619 /*
5620 * This really ought to be a well formedness error
5621 * but the XML Core WG decided otherwise c.f. issue
5622 * E26 of the XML erratas.
5623 */
5624 } else {
5625 if (uri->fragment != NULL) {
5626 /*
5627 * Okay this is foolish to block those but not
5628 * invalid URIs.
5629 */
5630 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5631 } else {
5632 if ((ctxt->sax != NULL) &&
5633 (!ctxt->disableSAX) &&
5634 (ctxt->sax->entityDecl != NULL))
5635 ctxt->sax->entityDecl(ctxt->userData, name,
5636 XML_EXTERNAL_PARAMETER_ENTITY,
5637 literal, URI, NULL);
5638 }
5639 xmlFreeURI(uri);
5640 }
5641 }
5642 }
5643 } else {
5644 if ((RAW == '"') || (RAW == '\'')) {
5645 value = xmlParseEntityValue(ctxt, &orig);
5646 if ((ctxt->sax != NULL) &&
5647 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5648 ctxt->sax->entityDecl(ctxt->userData, name,
5649 XML_INTERNAL_GENERAL_ENTITY,
5650 NULL, NULL, value);
5651 /*
5652 * For expat compatibility in SAX mode.
5653 */
5654 if ((ctxt->myDoc == NULL) ||
5655 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5656 if (ctxt->myDoc == NULL) {
5657 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5658 if (ctxt->myDoc == NULL) {
5659 xmlErrMemory(ctxt, "New Doc failed");
5660 goto done;
5661 }
5662 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5663 }
5664 if (ctxt->myDoc->intSubset == NULL)
5665 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5666 BAD_CAST "fake", NULL, NULL);
5667
5668 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5669 NULL, NULL, value);
5670 }
5671 } else {
5672 URI = xmlParseExternalID(ctxt, &literal, 1);
5673 if ((URI == NULL) && (literal == NULL)) {
5674 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5675 }
5676 if (URI) {
5677 xmlURIPtr uri;
5678
5679 uri = xmlParseURI((const char *)URI);
5680 if (uri == NULL) {
5681 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5682 "Invalid URI: %s\n", URI);
5683 /*
5684 * This really ought to be a well formedness error
5685 * but the XML Core WG decided otherwise c.f. issue
5686 * E26 of the XML erratas.
5687 */
5688 } else {
5689 if (uri->fragment != NULL) {
5690 /*
5691 * Okay this is foolish to block those but not
5692 * invalid URIs.
5693 */
5694 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5695 }
5696 xmlFreeURI(uri);
5697 }
5698 }
5699 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 "Space required before 'NDATA'\n");
5702 }
5703 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5704 SKIP(5);
5705 if (SKIP_BLANKS == 0) {
5706 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707 "Space required after 'NDATA'\n");
5708 }
5709 ndata = xmlParseName(ctxt);
5710 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5711 (ctxt->sax->unparsedEntityDecl != NULL))
5712 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5713 literal, URI, ndata);
5714 } else {
5715 if ((ctxt->sax != NULL) &&
5716 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5717 ctxt->sax->entityDecl(ctxt->userData, name,
5718 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5719 literal, URI, NULL);
5720 /*
5721 * For expat compatibility in SAX mode.
5722 * assuming the entity replacement was asked for
5723 */
5724 if ((ctxt->replaceEntities != 0) &&
5725 ((ctxt->myDoc == NULL) ||
5726 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5727 if (ctxt->myDoc == NULL) {
5728 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5729 if (ctxt->myDoc == NULL) {
5730 xmlErrMemory(ctxt, "New Doc failed");
5731 goto done;
5732 }
5733 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5734 }
5735
5736 if (ctxt->myDoc->intSubset == NULL)
5737 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5738 BAD_CAST "fake", NULL, NULL);
5739 xmlSAX2EntityDecl(ctxt, name,
5740 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5741 literal, URI, NULL);
5742 }
5743 }
5744 }
5745 }
5746 if (ctxt->instate == XML_PARSER_EOF)
5747 goto done;
5748 SKIP_BLANKS;
5749 if (RAW != '>') {
5750 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5751 "xmlParseEntityDecl: entity %s not terminated\n", name);
5752 xmlHaltParser(ctxt);
5753 } else {
5754 if (inputid != ctxt->input->id) {
5755 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5756 "Entity declaration doesn't start and stop in"
5757 " the same entity\n");
5758 }
5759 NEXT;
5760 }
5761 if (orig != NULL) {
5762 /*
5763 * Ugly mechanism to save the raw entity value.
5764 */
5765 xmlEntityPtr cur = NULL;
5766
5767 if (isParameter) {
5768 if ((ctxt->sax != NULL) &&
5769 (ctxt->sax->getParameterEntity != NULL))
5770 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5771 } else {
5772 if ((ctxt->sax != NULL) &&
5773 (ctxt->sax->getEntity != NULL))
5774 cur = ctxt->sax->getEntity(ctxt->userData, name);
5775 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5776 cur = xmlSAX2GetEntity(ctxt, name);
5777 }
5778 }
5779 if ((cur != NULL) && (cur->orig == NULL)) {
5780 cur->orig = orig;
5781 orig = NULL;
5782 }
5783 }
5784
5785 done:
5786 if (value != NULL) xmlFree(value);
5787 if (URI != NULL) xmlFree(URI);
5788 if (literal != NULL) xmlFree(literal);
5789 if (orig != NULL) xmlFree(orig);
5790 }
5791 }
5792
5793 /**
5794 * xmlParseDefaultDecl:
5795 * @ctxt: an XML parser context
5796 * @value: Receive a possible fixed default value for the attribute
5797 *
5798 * DEPRECATED: Internal function, don't use.
5799 *
5800 * Parse an attribute default declaration
5801 *
5802 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5803 *
5804 * [ VC: Required Attribute ]
5805 * if the default declaration is the keyword #REQUIRED, then the
5806 * attribute must be specified for all elements of the type in the
5807 * attribute-list declaration.
5808 *
5809 * [ VC: Attribute Default Legal ]
5810 * The declared default value must meet the lexical constraints of
5811 * the declared attribute type c.f. xmlValidateAttributeDecl()
5812 *
5813 * [ VC: Fixed Attribute Default ]
5814 * if an attribute has a default value declared with the #FIXED
5815 * keyword, instances of that attribute must match the default value.
5816 *
5817 * [ WFC: No < in Attribute Values ]
5818 * handled in xmlParseAttValue()
5819 *
5820 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5821 * or XML_ATTRIBUTE_FIXED.
5822 */
5823
5824 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5825 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5826 int val;
5827 xmlChar *ret;
5828
5829 *value = NULL;
5830 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5831 SKIP(9);
5832 return(XML_ATTRIBUTE_REQUIRED);
5833 }
5834 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5835 SKIP(8);
5836 return(XML_ATTRIBUTE_IMPLIED);
5837 }
5838 val = XML_ATTRIBUTE_NONE;
5839 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5840 SKIP(6);
5841 val = XML_ATTRIBUTE_FIXED;
5842 if (SKIP_BLANKS == 0) {
5843 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5844 "Space required after '#FIXED'\n");
5845 }
5846 }
5847 ret = xmlParseAttValue(ctxt);
5848 ctxt->instate = XML_PARSER_DTD;
5849 if (ret == NULL) {
5850 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5851 "Attribute default value declaration error\n");
5852 } else
5853 *value = ret;
5854 return(val);
5855 }
5856
5857 /**
5858 * xmlParseNotationType:
5859 * @ctxt: an XML parser context
5860 *
5861 * DEPRECATED: Internal function, don't use.
5862 *
5863 * parse an Notation attribute type.
5864 *
5865 * Note: the leading 'NOTATION' S part has already being parsed...
5866 *
5867 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5868 *
5869 * [ VC: Notation Attributes ]
5870 * Values of this type must match one of the notation names included
5871 * in the declaration; all notation names in the declaration must be declared.
5872 *
5873 * Returns: the notation attribute tree built while parsing
5874 */
5875
5876 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5877 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5878 const xmlChar *name;
5879 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5880
5881 if (RAW != '(') {
5882 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5883 return(NULL);
5884 }
5885 do {
5886 NEXT;
5887 SKIP_BLANKS;
5888 name = xmlParseName(ctxt);
5889 if (name == NULL) {
5890 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5891 "Name expected in NOTATION declaration\n");
5892 xmlFreeEnumeration(ret);
5893 return(NULL);
5894 }
5895 tmp = ret;
5896 while (tmp != NULL) {
5897 if (xmlStrEqual(name, tmp->name)) {
5898 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5899 "standalone: attribute notation value token %s duplicated\n",
5900 name, NULL);
5901 if (!xmlDictOwns(ctxt->dict, name))
5902 xmlFree((xmlChar *) name);
5903 break;
5904 }
5905 tmp = tmp->next;
5906 }
5907 if (tmp == NULL) {
5908 cur = xmlCreateEnumeration(name);
5909 if (cur == NULL) {
5910 xmlFreeEnumeration(ret);
5911 return(NULL);
5912 }
5913 if (last == NULL) ret = last = cur;
5914 else {
5915 last->next = cur;
5916 last = cur;
5917 }
5918 }
5919 SKIP_BLANKS;
5920 } while (RAW == '|');
5921 if (RAW != ')') {
5922 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5923 xmlFreeEnumeration(ret);
5924 return(NULL);
5925 }
5926 NEXT;
5927 return(ret);
5928 }
5929
5930 /**
5931 * xmlParseEnumerationType:
5932 * @ctxt: an XML parser context
5933 *
5934 * DEPRECATED: Internal function, don't use.
5935 *
5936 * parse an Enumeration attribute type.
5937 *
5938 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5939 *
5940 * [ VC: Enumeration ]
5941 * Values of this type must match one of the Nmtoken tokens in
5942 * the declaration
5943 *
5944 * Returns: the enumeration attribute tree built while parsing
5945 */
5946
5947 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5948 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5949 xmlChar *name;
5950 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5951
5952 if (RAW != '(') {
5953 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5954 return(NULL);
5955 }
5956 do {
5957 NEXT;
5958 SKIP_BLANKS;
5959 name = xmlParseNmtoken(ctxt);
5960 if (name == NULL) {
5961 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5962 return(ret);
5963 }
5964 tmp = ret;
5965 while (tmp != NULL) {
5966 if (xmlStrEqual(name, tmp->name)) {
5967 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5968 "standalone: attribute enumeration value token %s duplicated\n",
5969 name, NULL);
5970 if (!xmlDictOwns(ctxt->dict, name))
5971 xmlFree(name);
5972 break;
5973 }
5974 tmp = tmp->next;
5975 }
5976 if (tmp == NULL) {
5977 cur = xmlCreateEnumeration(name);
5978 if (!xmlDictOwns(ctxt->dict, name))
5979 xmlFree(name);
5980 if (cur == NULL) {
5981 xmlFreeEnumeration(ret);
5982 return(NULL);
5983 }
5984 if (last == NULL) ret = last = cur;
5985 else {
5986 last->next = cur;
5987 last = cur;
5988 }
5989 }
5990 SKIP_BLANKS;
5991 } while (RAW == '|');
5992 if (RAW != ')') {
5993 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5994 return(ret);
5995 }
5996 NEXT;
5997 return(ret);
5998 }
5999
6000 /**
6001 * xmlParseEnumeratedType:
6002 * @ctxt: an XML parser context
6003 * @tree: the enumeration tree built while parsing
6004 *
6005 * DEPRECATED: Internal function, don't use.
6006 *
6007 * parse an Enumerated attribute type.
6008 *
6009 * [57] EnumeratedType ::= NotationType | Enumeration
6010 *
6011 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6012 *
6013 *
6014 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6015 */
6016
6017 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6018 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6019 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6020 SKIP(8);
6021 if (SKIP_BLANKS == 0) {
6022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023 "Space required after 'NOTATION'\n");
6024 return(0);
6025 }
6026 *tree = xmlParseNotationType(ctxt);
6027 if (*tree == NULL) return(0);
6028 return(XML_ATTRIBUTE_NOTATION);
6029 }
6030 *tree = xmlParseEnumerationType(ctxt);
6031 if (*tree == NULL) return(0);
6032 return(XML_ATTRIBUTE_ENUMERATION);
6033 }
6034
6035 /**
6036 * xmlParseAttributeType:
6037 * @ctxt: an XML parser context
6038 * @tree: the enumeration tree built while parsing
6039 *
6040 * DEPRECATED: Internal function, don't use.
6041 *
6042 * parse the Attribute list def for an element
6043 *
6044 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6045 *
6046 * [55] StringType ::= 'CDATA'
6047 *
6048 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6049 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6050 *
6051 * Validity constraints for attribute values syntax are checked in
6052 * xmlValidateAttributeValue()
6053 *
6054 * [ VC: ID ]
6055 * Values of type ID must match the Name production. A name must not
6056 * appear more than once in an XML document as a value of this type;
6057 * i.e., ID values must uniquely identify the elements which bear them.
6058 *
6059 * [ VC: One ID per Element Type ]
6060 * No element type may have more than one ID attribute specified.
6061 *
6062 * [ VC: ID Attribute Default ]
6063 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6064 *
6065 * [ VC: IDREF ]
6066 * Values of type IDREF must match the Name production, and values
6067 * of type IDREFS must match Names; each IDREF Name must match the value
6068 * of an ID attribute on some element in the XML document; i.e. IDREF
6069 * values must match the value of some ID attribute.
6070 *
6071 * [ VC: Entity Name ]
6072 * Values of type ENTITY must match the Name production, values
6073 * of type ENTITIES must match Names; each Entity Name must match the
6074 * name of an unparsed entity declared in the DTD.
6075 *
6076 * [ VC: Name Token ]
6077 * Values of type NMTOKEN must match the Nmtoken production; values
6078 * of type NMTOKENS must match Nmtokens.
6079 *
6080 * Returns the attribute type
6081 */
6082 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6083 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6084 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6085 SKIP(5);
6086 return(XML_ATTRIBUTE_CDATA);
6087 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6088 SKIP(6);
6089 return(XML_ATTRIBUTE_IDREFS);
6090 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6091 SKIP(5);
6092 return(XML_ATTRIBUTE_IDREF);
6093 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6094 SKIP(2);
6095 return(XML_ATTRIBUTE_ID);
6096 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6097 SKIP(6);
6098 return(XML_ATTRIBUTE_ENTITY);
6099 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6100 SKIP(8);
6101 return(XML_ATTRIBUTE_ENTITIES);
6102 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6103 SKIP(8);
6104 return(XML_ATTRIBUTE_NMTOKENS);
6105 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6106 SKIP(7);
6107 return(XML_ATTRIBUTE_NMTOKEN);
6108 }
6109 return(xmlParseEnumeratedType(ctxt, tree));
6110 }
6111
6112 /**
6113 * xmlParseAttributeListDecl:
6114 * @ctxt: an XML parser context
6115 *
6116 * DEPRECATED: Internal function, don't use.
6117 *
6118 * Parse an attribute list declaration for an element. Always consumes '<!'.
6119 *
6120 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6121 *
6122 * [53] AttDef ::= S Name S AttType S DefaultDecl
6123 *
6124 */
6125 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6126 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6127 const xmlChar *elemName;
6128 const xmlChar *attrName;
6129 xmlEnumerationPtr tree;
6130
6131 if ((CUR != '<') || (NXT(1) != '!'))
6132 return;
6133 SKIP(2);
6134
6135 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6136 int inputid = ctxt->input->id;
6137
6138 SKIP(7);
6139 if (SKIP_BLANKS == 0) {
6140 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6141 "Space required after '<!ATTLIST'\n");
6142 }
6143 elemName = xmlParseName(ctxt);
6144 if (elemName == NULL) {
6145 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6146 "ATTLIST: no name for Element\n");
6147 return;
6148 }
6149 SKIP_BLANKS;
6150 GROW;
6151 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6152 int type;
6153 int def;
6154 xmlChar *defaultValue = NULL;
6155
6156 GROW;
6157 tree = NULL;
6158 attrName = xmlParseName(ctxt);
6159 if (attrName == NULL) {
6160 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6161 "ATTLIST: no name for Attribute\n");
6162 break;
6163 }
6164 GROW;
6165 if (SKIP_BLANKS == 0) {
6166 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6167 "Space required after the attribute name\n");
6168 break;
6169 }
6170
6171 type = xmlParseAttributeType(ctxt, &tree);
6172 if (type <= 0) {
6173 break;
6174 }
6175
6176 GROW;
6177 if (SKIP_BLANKS == 0) {
6178 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6179 "Space required after the attribute type\n");
6180 if (tree != NULL)
6181 xmlFreeEnumeration(tree);
6182 break;
6183 }
6184
6185 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6186 if (def <= 0) {
6187 if (defaultValue != NULL)
6188 xmlFree(defaultValue);
6189 if (tree != NULL)
6190 xmlFreeEnumeration(tree);
6191 break;
6192 }
6193 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6194 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6195
6196 GROW;
6197 if (RAW != '>') {
6198 if (SKIP_BLANKS == 0) {
6199 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6200 "Space required after the attribute default value\n");
6201 if (defaultValue != NULL)
6202 xmlFree(defaultValue);
6203 if (tree != NULL)
6204 xmlFreeEnumeration(tree);
6205 break;
6206 }
6207 }
6208 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6209 (ctxt->sax->attributeDecl != NULL))
6210 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6211 type, def, defaultValue, tree);
6212 else if (tree != NULL)
6213 xmlFreeEnumeration(tree);
6214
6215 if ((ctxt->sax2) && (defaultValue != NULL) &&
6216 (def != XML_ATTRIBUTE_IMPLIED) &&
6217 (def != XML_ATTRIBUTE_REQUIRED)) {
6218 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6219 }
6220 if (ctxt->sax2) {
6221 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6222 }
6223 if (defaultValue != NULL)
6224 xmlFree(defaultValue);
6225 GROW;
6226 }
6227 if (RAW == '>') {
6228 if (inputid != ctxt->input->id) {
6229 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6230 "Attribute list declaration doesn't start and"
6231 " stop in the same entity\n");
6232 }
6233 NEXT;
6234 }
6235 }
6236 }
6237
6238 /**
6239 * xmlParseElementMixedContentDecl:
6240 * @ctxt: an XML parser context
6241 * @inputchk: the input used for the current entity, needed for boundary checks
6242 *
6243 * DEPRECATED: Internal function, don't use.
6244 *
6245 * parse the declaration for a Mixed Element content
6246 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6247 *
6248 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6249 * '(' S? '#PCDATA' S? ')'
6250 *
6251 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6252 *
6253 * [ VC: No Duplicate Types ]
6254 * The same name must not appear more than once in a single
6255 * mixed-content declaration.
6256 *
6257 * returns: the list of the xmlElementContentPtr describing the element choices
6258 */
6259 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6260 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6261 xmlElementContentPtr ret = NULL, cur = NULL, n;
6262 const xmlChar *elem = NULL;
6263
6264 GROW;
6265 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6266 SKIP(7);
6267 SKIP_BLANKS;
6268 if (RAW == ')') {
6269 if (ctxt->input->id != inputchk) {
6270 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6271 "Element content declaration doesn't start and"
6272 " stop in the same entity\n");
6273 }
6274 NEXT;
6275 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6276 if (ret == NULL)
6277 return(NULL);
6278 if (RAW == '*') {
6279 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6280 NEXT;
6281 }
6282 return(ret);
6283 }
6284 if ((RAW == '(') || (RAW == '|')) {
6285 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6286 if (ret == NULL) return(NULL);
6287 }
6288 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6289 NEXT;
6290 if (elem == NULL) {
6291 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6292 if (ret == NULL) {
6293 xmlFreeDocElementContent(ctxt->myDoc, cur);
6294 return(NULL);
6295 }
6296 ret->c1 = cur;
6297 if (cur != NULL)
6298 cur->parent = ret;
6299 cur = ret;
6300 } else {
6301 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6302 if (n == NULL) {
6303 xmlFreeDocElementContent(ctxt->myDoc, ret);
6304 return(NULL);
6305 }
6306 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6307 if (n->c1 != NULL)
6308 n->c1->parent = n;
6309 cur->c2 = n;
6310 if (n != NULL)
6311 n->parent = cur;
6312 cur = n;
6313 }
6314 SKIP_BLANKS;
6315 elem = xmlParseName(ctxt);
6316 if (elem == NULL) {
6317 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6318 "xmlParseElementMixedContentDecl : Name expected\n");
6319 xmlFreeDocElementContent(ctxt->myDoc, ret);
6320 return(NULL);
6321 }
6322 SKIP_BLANKS;
6323 GROW;
6324 }
6325 if ((RAW == ')') && (NXT(1) == '*')) {
6326 if (elem != NULL) {
6327 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6328 XML_ELEMENT_CONTENT_ELEMENT);
6329 if (cur->c2 != NULL)
6330 cur->c2->parent = cur;
6331 }
6332 if (ret != NULL)
6333 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6334 if (ctxt->input->id != inputchk) {
6335 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6336 "Element content declaration doesn't start and"
6337 " stop in the same entity\n");
6338 }
6339 SKIP(2);
6340 } else {
6341 xmlFreeDocElementContent(ctxt->myDoc, ret);
6342 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6343 return(NULL);
6344 }
6345
6346 } else {
6347 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6348 }
6349 return(ret);
6350 }
6351
6352 /**
6353 * xmlParseElementChildrenContentDeclPriv:
6354 * @ctxt: an XML parser context
6355 * @inputchk: the input used for the current entity, needed for boundary checks
6356 * @depth: the level of recursion
6357 *
6358 * parse the declaration for a Mixed Element content
6359 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6360 *
6361 *
6362 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6363 *
6364 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6365 *
6366 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6367 *
6368 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6369 *
6370 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6371 * TODO Parameter-entity replacement text must be properly nested
6372 * with parenthesized groups. That is to say, if either of the
6373 * opening or closing parentheses in a choice, seq, or Mixed
6374 * construct is contained in the replacement text for a parameter
6375 * entity, both must be contained in the same replacement text. For
6376 * interoperability, if a parameter-entity reference appears in a
6377 * choice, seq, or Mixed construct, its replacement text should not
6378 * be empty, and neither the first nor last non-blank character of
6379 * the replacement text should be a connector (| or ,).
6380 *
6381 * Returns the tree of xmlElementContentPtr describing the element
6382 * hierarchy.
6383 */
6384 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6385 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6386 int depth) {
6387 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6388 const xmlChar *elem;
6389 xmlChar type = 0;
6390
6391 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6392 (depth > 2048)) {
6393 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6394 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6395 depth);
6396 return(NULL);
6397 }
6398 SKIP_BLANKS;
6399 GROW;
6400 if (RAW == '(') {
6401 int inputid = ctxt->input->id;
6402
6403 /* Recurse on first child */
6404 NEXT;
6405 SKIP_BLANKS;
6406 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6407 depth + 1);
6408 if (cur == NULL)
6409 return(NULL);
6410 SKIP_BLANKS;
6411 GROW;
6412 } else {
6413 elem = xmlParseName(ctxt);
6414 if (elem == NULL) {
6415 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6416 return(NULL);
6417 }
6418 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6419 if (cur == NULL) {
6420 xmlErrMemory(ctxt, NULL);
6421 return(NULL);
6422 }
6423 GROW;
6424 if (RAW == '?') {
6425 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6426 NEXT;
6427 } else if (RAW == '*') {
6428 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6429 NEXT;
6430 } else if (RAW == '+') {
6431 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6432 NEXT;
6433 } else {
6434 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6435 }
6436 GROW;
6437 }
6438 SKIP_BLANKS;
6439 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6440 /*
6441 * Each loop we parse one separator and one element.
6442 */
6443 if (RAW == ',') {
6444 if (type == 0) type = CUR;
6445
6446 /*
6447 * Detect "Name | Name , Name" error
6448 */
6449 else if (type != CUR) {
6450 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6451 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6452 type);
6453 if ((last != NULL) && (last != ret))
6454 xmlFreeDocElementContent(ctxt->myDoc, last);
6455 if (ret != NULL)
6456 xmlFreeDocElementContent(ctxt->myDoc, ret);
6457 return(NULL);
6458 }
6459 NEXT;
6460
6461 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6462 if (op == NULL) {
6463 if ((last != NULL) && (last != ret))
6464 xmlFreeDocElementContent(ctxt->myDoc, last);
6465 xmlFreeDocElementContent(ctxt->myDoc, ret);
6466 return(NULL);
6467 }
6468 if (last == NULL) {
6469 op->c1 = ret;
6470 if (ret != NULL)
6471 ret->parent = op;
6472 ret = cur = op;
6473 } else {
6474 cur->c2 = op;
6475 if (op != NULL)
6476 op->parent = cur;
6477 op->c1 = last;
6478 if (last != NULL)
6479 last->parent = op;
6480 cur =op;
6481 last = NULL;
6482 }
6483 } else if (RAW == '|') {
6484 if (type == 0) type = CUR;
6485
6486 /*
6487 * Detect "Name , Name | Name" error
6488 */
6489 else if (type != CUR) {
6490 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6491 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6492 type);
6493 if ((last != NULL) && (last != ret))
6494 xmlFreeDocElementContent(ctxt->myDoc, last);
6495 if (ret != NULL)
6496 xmlFreeDocElementContent(ctxt->myDoc, ret);
6497 return(NULL);
6498 }
6499 NEXT;
6500
6501 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6502 if (op == NULL) {
6503 if ((last != NULL) && (last != ret))
6504 xmlFreeDocElementContent(ctxt->myDoc, last);
6505 if (ret != NULL)
6506 xmlFreeDocElementContent(ctxt->myDoc, ret);
6507 return(NULL);
6508 }
6509 if (last == NULL) {
6510 op->c1 = ret;
6511 if (ret != NULL)
6512 ret->parent = op;
6513 ret = cur = op;
6514 } else {
6515 cur->c2 = op;
6516 if (op != NULL)
6517 op->parent = cur;
6518 op->c1 = last;
6519 if (last != NULL)
6520 last->parent = op;
6521 cur =op;
6522 last = NULL;
6523 }
6524 } else {
6525 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6526 if ((last != NULL) && (last != ret))
6527 xmlFreeDocElementContent(ctxt->myDoc, last);
6528 if (ret != NULL)
6529 xmlFreeDocElementContent(ctxt->myDoc, ret);
6530 return(NULL);
6531 }
6532 GROW;
6533 SKIP_BLANKS;
6534 GROW;
6535 if (RAW == '(') {
6536 int inputid = ctxt->input->id;
6537 /* Recurse on second child */
6538 NEXT;
6539 SKIP_BLANKS;
6540 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6541 depth + 1);
6542 if (last == NULL) {
6543 if (ret != NULL)
6544 xmlFreeDocElementContent(ctxt->myDoc, ret);
6545 return(NULL);
6546 }
6547 SKIP_BLANKS;
6548 } else {
6549 elem = xmlParseName(ctxt);
6550 if (elem == NULL) {
6551 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6552 if (ret != NULL)
6553 xmlFreeDocElementContent(ctxt->myDoc, ret);
6554 return(NULL);
6555 }
6556 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6557 if (last == NULL) {
6558 if (ret != NULL)
6559 xmlFreeDocElementContent(ctxt->myDoc, ret);
6560 return(NULL);
6561 }
6562 if (RAW == '?') {
6563 last->ocur = XML_ELEMENT_CONTENT_OPT;
6564 NEXT;
6565 } else if (RAW == '*') {
6566 last->ocur = XML_ELEMENT_CONTENT_MULT;
6567 NEXT;
6568 } else if (RAW == '+') {
6569 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6570 NEXT;
6571 } else {
6572 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6573 }
6574 }
6575 SKIP_BLANKS;
6576 GROW;
6577 }
6578 if ((cur != NULL) && (last != NULL)) {
6579 cur->c2 = last;
6580 if (last != NULL)
6581 last->parent = cur;
6582 }
6583 if (ctxt->input->id != inputchk) {
6584 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6585 "Element content declaration doesn't start and stop in"
6586 " the same entity\n");
6587 }
6588 NEXT;
6589 if (RAW == '?') {
6590 if (ret != NULL) {
6591 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6592 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6593 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6594 else
6595 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6596 }
6597 NEXT;
6598 } else if (RAW == '*') {
6599 if (ret != NULL) {
6600 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6601 cur = ret;
6602 /*
6603 * Some normalization:
6604 * (a | b* | c?)* == (a | b | c)*
6605 */
6606 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6607 if ((cur->c1 != NULL) &&
6608 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6609 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6610 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6611 if ((cur->c2 != NULL) &&
6612 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6613 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6614 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6615 cur = cur->c2;
6616 }
6617 }
6618 NEXT;
6619 } else if (RAW == '+') {
6620 if (ret != NULL) {
6621 int found = 0;
6622
6623 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6624 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6625 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6626 else
6627 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6628 /*
6629 * Some normalization:
6630 * (a | b*)+ == (a | b)*
6631 * (a | b?)+ == (a | b)*
6632 */
6633 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6634 if ((cur->c1 != NULL) &&
6635 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6636 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6637 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6638 found = 1;
6639 }
6640 if ((cur->c2 != NULL) &&
6641 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6642 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6643 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6644 found = 1;
6645 }
6646 cur = cur->c2;
6647 }
6648 if (found)
6649 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6650 }
6651 NEXT;
6652 }
6653 return(ret);
6654 }
6655
6656 /**
6657 * xmlParseElementChildrenContentDecl:
6658 * @ctxt: an XML parser context
6659 * @inputchk: the input used for the current entity, needed for boundary checks
6660 *
6661 * DEPRECATED: Internal function, don't use.
6662 *
6663 * parse the declaration for a Mixed Element content
6664 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6665 *
6666 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6667 *
6668 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6669 *
6670 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6671 *
6672 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6673 *
6674 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6675 * TODO Parameter-entity replacement text must be properly nested
6676 * with parenthesized groups. That is to say, if either of the
6677 * opening or closing parentheses in a choice, seq, or Mixed
6678 * construct is contained in the replacement text for a parameter
6679 * entity, both must be contained in the same replacement text. For
6680 * interoperability, if a parameter-entity reference appears in a
6681 * choice, seq, or Mixed construct, its replacement text should not
6682 * be empty, and neither the first nor last non-blank character of
6683 * the replacement text should be a connector (| or ,).
6684 *
6685 * Returns the tree of xmlElementContentPtr describing the element
6686 * hierarchy.
6687 */
6688 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6689 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6690 /* stub left for API/ABI compat */
6691 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6692 }
6693
6694 /**
6695 * xmlParseElementContentDecl:
6696 * @ctxt: an XML parser context
6697 * @name: the name of the element being defined.
6698 * @result: the Element Content pointer will be stored here if any
6699 *
6700 * DEPRECATED: Internal function, don't use.
6701 *
6702 * parse the declaration for an Element content either Mixed or Children,
6703 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6704 *
6705 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6706 *
6707 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6708 */
6709
6710 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6711 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6712 xmlElementContentPtr *result) {
6713
6714 xmlElementContentPtr tree = NULL;
6715 int inputid = ctxt->input->id;
6716 int res;
6717
6718 *result = NULL;
6719
6720 if (RAW != '(') {
6721 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6722 "xmlParseElementContentDecl : %s '(' expected\n", name);
6723 return(-1);
6724 }
6725 NEXT;
6726 GROW;
6727 if (ctxt->instate == XML_PARSER_EOF)
6728 return(-1);
6729 SKIP_BLANKS;
6730 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6731 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6732 res = XML_ELEMENT_TYPE_MIXED;
6733 } else {
6734 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6735 res = XML_ELEMENT_TYPE_ELEMENT;
6736 }
6737 SKIP_BLANKS;
6738 *result = tree;
6739 return(res);
6740 }
6741
6742 /**
6743 * xmlParseElementDecl:
6744 * @ctxt: an XML parser context
6745 *
6746 * DEPRECATED: Internal function, don't use.
6747 *
6748 * Parse an element declaration. Always consumes '<!'.
6749 *
6750 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6751 *
6752 * [ VC: Unique Element Type Declaration ]
6753 * No element type may be declared more than once
6754 *
6755 * Returns the type of the element, or -1 in case of error
6756 */
6757 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6758 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6759 const xmlChar *name;
6760 int ret = -1;
6761 xmlElementContentPtr content = NULL;
6762
6763 if ((CUR != '<') || (NXT(1) != '!'))
6764 return(ret);
6765 SKIP(2);
6766
6767 /* GROW; done in the caller */
6768 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6769 int inputid = ctxt->input->id;
6770
6771 SKIP(7);
6772 if (SKIP_BLANKS == 0) {
6773 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6774 "Space required after 'ELEMENT'\n");
6775 return(-1);
6776 }
6777 name = xmlParseName(ctxt);
6778 if (name == NULL) {
6779 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6780 "xmlParseElementDecl: no name for Element\n");
6781 return(-1);
6782 }
6783 if (SKIP_BLANKS == 0) {
6784 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6785 "Space required after the element name\n");
6786 }
6787 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6788 SKIP(5);
6789 /*
6790 * Element must always be empty.
6791 */
6792 ret = XML_ELEMENT_TYPE_EMPTY;
6793 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6794 (NXT(2) == 'Y')) {
6795 SKIP(3);
6796 /*
6797 * Element is a generic container.
6798 */
6799 ret = XML_ELEMENT_TYPE_ANY;
6800 } else if (RAW == '(') {
6801 ret = xmlParseElementContentDecl(ctxt, name, &content);
6802 } else {
6803 /*
6804 * [ WFC: PEs in Internal Subset ] error handling.
6805 */
6806 if ((RAW == '%') && (ctxt->external == 0) &&
6807 (ctxt->inputNr == 1)) {
6808 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6809 "PEReference: forbidden within markup decl in internal subset\n");
6810 } else {
6811 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6812 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6813 }
6814 return(-1);
6815 }
6816
6817 SKIP_BLANKS;
6818
6819 if (RAW != '>') {
6820 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6821 if (content != NULL) {
6822 xmlFreeDocElementContent(ctxt->myDoc, content);
6823 }
6824 } else {
6825 if (inputid != ctxt->input->id) {
6826 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6827 "Element declaration doesn't start and stop in"
6828 " the same entity\n");
6829 }
6830
6831 NEXT;
6832 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6833 (ctxt->sax->elementDecl != NULL)) {
6834 if (content != NULL)
6835 content->parent = NULL;
6836 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6837 content);
6838 if ((content != NULL) && (content->parent == NULL)) {
6839 /*
6840 * this is a trick: if xmlAddElementDecl is called,
6841 * instead of copying the full tree it is plugged directly
6842 * if called from the parser. Avoid duplicating the
6843 * interfaces or change the API/ABI
6844 */
6845 xmlFreeDocElementContent(ctxt->myDoc, content);
6846 }
6847 } else if (content != NULL) {
6848 xmlFreeDocElementContent(ctxt->myDoc, content);
6849 }
6850 }
6851 }
6852 return(ret);
6853 }
6854
6855 /**
6856 * xmlParseConditionalSections
6857 * @ctxt: an XML parser context
6858 *
6859 * Parse a conditional section. Always consumes '<!['.
6860 *
6861 * [61] conditionalSect ::= includeSect | ignoreSect
6862 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6863 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6864 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6865 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6866 */
6867
6868 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6869 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6870 int *inputIds = NULL;
6871 size_t inputIdsSize = 0;
6872 size_t depth = 0;
6873
6874 while (ctxt->instate != XML_PARSER_EOF) {
6875 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6876 int id = ctxt->input->id;
6877
6878 SKIP(3);
6879 SKIP_BLANKS;
6880
6881 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6882 SKIP(7);
6883 SKIP_BLANKS;
6884 if (RAW != '[') {
6885 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6886 xmlHaltParser(ctxt);
6887 goto error;
6888 }
6889 if (ctxt->input->id != id) {
6890 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6891 "All markup of the conditional section is"
6892 " not in the same entity\n");
6893 }
6894 NEXT;
6895
6896 if (inputIdsSize <= depth) {
6897 int *tmp;
6898
6899 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6900 tmp = (int *) xmlRealloc(inputIds,
6901 inputIdsSize * sizeof(int));
6902 if (tmp == NULL) {
6903 xmlErrMemory(ctxt, NULL);
6904 goto error;
6905 }
6906 inputIds = tmp;
6907 }
6908 inputIds[depth] = id;
6909 depth++;
6910 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6911 size_t ignoreDepth = 0;
6912
6913 SKIP(6);
6914 SKIP_BLANKS;
6915 if (RAW != '[') {
6916 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6917 xmlHaltParser(ctxt);
6918 goto error;
6919 }
6920 if (ctxt->input->id != id) {
6921 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6922 "All markup of the conditional section is"
6923 " not in the same entity\n");
6924 }
6925 NEXT;
6926
6927 while (RAW != 0) {
6928 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6929 SKIP(3);
6930 ignoreDepth++;
6931 /* Check for integer overflow */
6932 if (ignoreDepth == 0) {
6933 xmlErrMemory(ctxt, NULL);
6934 goto error;
6935 }
6936 } else if ((RAW == ']') && (NXT(1) == ']') &&
6937 (NXT(2) == '>')) {
6938 if (ignoreDepth == 0)
6939 break;
6940 SKIP(3);
6941 ignoreDepth--;
6942 } else {
6943 NEXT;
6944 }
6945 }
6946
6947 if (RAW == 0) {
6948 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6949 goto error;
6950 }
6951 if (ctxt->input->id != id) {
6952 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6953 "All markup of the conditional section is"
6954 " not in the same entity\n");
6955 }
6956 SKIP(3);
6957 } else {
6958 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6959 xmlHaltParser(ctxt);
6960 goto error;
6961 }
6962 } else if ((depth > 0) &&
6963 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6964 depth--;
6965 if (ctxt->input->id != inputIds[depth]) {
6966 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6967 "All markup of the conditional section is not"
6968 " in the same entity\n");
6969 }
6970 SKIP(3);
6971 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6972 xmlParseMarkupDecl(ctxt);
6973 } else {
6974 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6975 xmlHaltParser(ctxt);
6976 goto error;
6977 }
6978
6979 if (depth == 0)
6980 break;
6981
6982 SKIP_BLANKS;
6983 SHRINK;
6984 GROW;
6985 }
6986
6987 error:
6988 xmlFree(inputIds);
6989 }
6990
6991 /**
6992 * xmlParseMarkupDecl:
6993 * @ctxt: an XML parser context
6994 *
6995 * DEPRECATED: Internal function, don't use.
6996 *
6997 * Parse markup declarations. Always consumes '<!' or '<?'.
6998 *
6999 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7000 * NotationDecl | PI | Comment
7001 *
7002 * [ VC: Proper Declaration/PE Nesting ]
7003 * Parameter-entity replacement text must be properly nested with
7004 * markup declarations. That is to say, if either the first character
7005 * or the last character of a markup declaration (markupdecl above) is
7006 * contained in the replacement text for a parameter-entity reference,
7007 * both must be contained in the same replacement text.
7008 *
7009 * [ WFC: PEs in Internal Subset ]
7010 * In the internal DTD subset, parameter-entity references can occur
7011 * only where markup declarations can occur, not within markup declarations.
7012 * (This does not apply to references that occur in external parameter
7013 * entities or to the external subset.)
7014 */
7015 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)7016 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7017 GROW;
7018 if (CUR == '<') {
7019 if (NXT(1) == '!') {
7020 switch (NXT(2)) {
7021 case 'E':
7022 if (NXT(3) == 'L')
7023 xmlParseElementDecl(ctxt);
7024 else if (NXT(3) == 'N')
7025 xmlParseEntityDecl(ctxt);
7026 else
7027 SKIP(2);
7028 break;
7029 case 'A':
7030 xmlParseAttributeListDecl(ctxt);
7031 break;
7032 case 'N':
7033 xmlParseNotationDecl(ctxt);
7034 break;
7035 case '-':
7036 xmlParseComment(ctxt);
7037 break;
7038 default:
7039 /* there is an error but it will be detected later */
7040 SKIP(2);
7041 break;
7042 }
7043 } else if (NXT(1) == '?') {
7044 xmlParsePI(ctxt);
7045 }
7046 }
7047
7048 /*
7049 * detect requirement to exit there and act accordingly
7050 * and avoid having instate overridden later on
7051 */
7052 if (ctxt->instate == XML_PARSER_EOF)
7053 return;
7054
7055 ctxt->instate = XML_PARSER_DTD;
7056 }
7057
7058 /**
7059 * xmlParseTextDecl:
7060 * @ctxt: an XML parser context
7061 *
7062 * DEPRECATED: Internal function, don't use.
7063 *
7064 * parse an XML declaration header for external entities
7065 *
7066 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7067 */
7068
7069 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7070 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7071 xmlChar *version;
7072 int oldstate;
7073
7074 /*
7075 * We know that '<?xml' is here.
7076 */
7077 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7078 SKIP(5);
7079 } else {
7080 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7081 return;
7082 }
7083
7084 /* Avoid expansion of parameter entities when skipping blanks. */
7085 oldstate = ctxt->instate;
7086 ctxt->instate = XML_PARSER_START;
7087
7088 if (SKIP_BLANKS == 0) {
7089 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7090 "Space needed after '<?xml'\n");
7091 }
7092
7093 /*
7094 * We may have the VersionInfo here.
7095 */
7096 version = xmlParseVersionInfo(ctxt);
7097 if (version == NULL)
7098 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7099 else {
7100 if (SKIP_BLANKS == 0) {
7101 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7102 "Space needed here\n");
7103 }
7104 }
7105 ctxt->input->version = version;
7106
7107 /*
7108 * We must have the encoding declaration
7109 */
7110 xmlParseEncodingDecl(ctxt);
7111 if (ctxt->instate == XML_PARSER_EOF)
7112 return;
7113 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7114 /*
7115 * The XML REC instructs us to stop parsing right here
7116 */
7117 ctxt->instate = oldstate;
7118 return;
7119 }
7120
7121 SKIP_BLANKS;
7122 if ((RAW == '?') && (NXT(1) == '>')) {
7123 SKIP(2);
7124 } else if (RAW == '>') {
7125 /* Deprecated old WD ... */
7126 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7127 NEXT;
7128 } else {
7129 int c;
7130
7131 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7132 while ((c = CUR) != 0) {
7133 NEXT;
7134 if (c == '>')
7135 break;
7136 }
7137 if (ctxt->instate == XML_PARSER_EOF)
7138 return;
7139 }
7140
7141 ctxt->instate = oldstate;
7142 }
7143
7144 /**
7145 * xmlParseExternalSubset:
7146 * @ctxt: an XML parser context
7147 * @ExternalID: the external identifier
7148 * @SystemID: the system identifier (or URL)
7149 *
7150 * parse Markup declarations from an external subset
7151 *
7152 * [30] extSubset ::= textDecl? extSubsetDecl
7153 *
7154 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7155 */
7156 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7157 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7158 const xmlChar *SystemID) {
7159 xmlDetectSAX2(ctxt);
7160
7161 xmlDetectEncoding(ctxt);
7162
7163 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7164 xmlParseTextDecl(ctxt);
7165 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7166 /*
7167 * The XML REC instructs us to stop parsing right here
7168 */
7169 xmlHaltParser(ctxt);
7170 return;
7171 }
7172 }
7173 if (ctxt->myDoc == NULL) {
7174 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7175 if (ctxt->myDoc == NULL) {
7176 xmlErrMemory(ctxt, "New Doc failed");
7177 return;
7178 }
7179 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7180 }
7181 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7182 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7183
7184 ctxt->instate = XML_PARSER_DTD;
7185 ctxt->external = 1;
7186 SKIP_BLANKS;
7187 while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7188 GROW;
7189 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7190 xmlParseConditionalSections(ctxt);
7191 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7192 xmlParseMarkupDecl(ctxt);
7193 } else {
7194 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7195 xmlHaltParser(ctxt);
7196 return;
7197 }
7198 SKIP_BLANKS;
7199 SHRINK;
7200 }
7201
7202 if (RAW != 0) {
7203 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7204 }
7205
7206 }
7207
7208 /**
7209 * xmlParseReference:
7210 * @ctxt: an XML parser context
7211 *
7212 * DEPRECATED: Internal function, don't use.
7213 *
7214 * parse and handle entity references in content, depending on the SAX
7215 * interface, this may end-up in a call to character() if this is a
7216 * CharRef, a predefined entity, if there is no reference() callback.
7217 * or if the parser was asked to switch to that mode.
7218 *
7219 * Always consumes '&'.
7220 *
7221 * [67] Reference ::= EntityRef | CharRef
7222 */
7223 void
xmlParseReference(xmlParserCtxtPtr ctxt)7224 xmlParseReference(xmlParserCtxtPtr ctxt) {
7225 xmlEntityPtr ent;
7226 xmlChar *val;
7227 int was_checked;
7228 xmlNodePtr list = NULL;
7229 xmlParserErrors ret = XML_ERR_OK;
7230
7231
7232 if (RAW != '&')
7233 return;
7234
7235 /*
7236 * Simple case of a CharRef
7237 */
7238 if (NXT(1) == '#') {
7239 int i = 0;
7240 xmlChar out[16];
7241 int value = xmlParseCharRef(ctxt);
7242
7243 if (value == 0)
7244 return;
7245
7246 /*
7247 * Just encode the value in UTF-8
7248 */
7249 COPY_BUF(out, i, value);
7250 out[i] = 0;
7251 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7252 (!ctxt->disableSAX))
7253 ctxt->sax->characters(ctxt->userData, out, i);
7254 return;
7255 }
7256
7257 /*
7258 * We are seeing an entity reference
7259 */
7260 ent = xmlParseEntityRef(ctxt);
7261 if (ent == NULL) return;
7262 if (!ctxt->wellFormed)
7263 return;
7264 was_checked = ent->flags & XML_ENT_PARSED;
7265
7266 /* special case of predefined entities */
7267 if ((ent->name == NULL) ||
7268 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7269 val = ent->content;
7270 if (val == NULL) return;
7271 /*
7272 * inline the entity.
7273 */
7274 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7275 (!ctxt->disableSAX))
7276 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7277 return;
7278 }
7279
7280 /*
7281 * The first reference to the entity trigger a parsing phase
7282 * where the ent->children is filled with the result from
7283 * the parsing.
7284 * Note: external parsed entities will not be loaded, it is not
7285 * required for a non-validating parser, unless the parsing option
7286 * of validating, or substituting entities were given. Doing so is
7287 * far more secure as the parser will only process data coming from
7288 * the document entity by default.
7289 *
7290 * FIXME: This doesn't work correctly since entities can be
7291 * expanded with different namespace declarations in scope.
7292 * For example:
7293 *
7294 * <!DOCTYPE doc [
7295 * <!ENTITY ent "<ns:elem/>">
7296 * ]>
7297 * <doc>
7298 * <decl1 xmlns:ns="urn:ns1">
7299 * &ent;
7300 * </decl1>
7301 * <decl2 xmlns:ns="urn:ns2">
7302 * &ent;
7303 * </decl2>
7304 * </doc>
7305 *
7306 * Proposed fix:
7307 *
7308 * - Remove the ent->owner optimization which tries to avoid the
7309 * initial copy of the entity. Always make entities own the
7310 * subtree.
7311 * - Ignore current namespace declarations when parsing the
7312 * entity. If a prefix can't be resolved, don't report an error
7313 * but mark it as unresolved.
7314 * - Try to resolve these prefixes when expanding the entity.
7315 * This will require a specialized version of xmlStaticCopyNode
7316 * which can also make use of the namespace hash table to avoid
7317 * quadratic behavior.
7318 *
7319 * Alternatively, we could simply reparse the entity on each
7320 * expansion like we already do with custom SAX callbacks.
7321 * External entity content should be cached in this case.
7322 */
7323 if (((ent->flags & XML_ENT_PARSED) == 0) &&
7324 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7325 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7326 unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7327
7328 /*
7329 * This is a bit hackish but this seems the best
7330 * way to make sure both SAX and DOM entity support
7331 * behaves okay.
7332 */
7333 void *user_data;
7334 if (ctxt->userData == ctxt)
7335 user_data = NULL;
7336 else
7337 user_data = ctxt->userData;
7338
7339 /* Avoid overflow as much as possible */
7340 ctxt->sizeentcopy = 0;
7341
7342 if (ent->flags & XML_ENT_EXPANDING) {
7343 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7344 xmlHaltParser(ctxt);
7345 return;
7346 }
7347
7348 ent->flags |= XML_ENT_EXPANDING;
7349
7350 /*
7351 * Check that this entity is well formed
7352 * 4.3.2: An internal general parsed entity is well-formed
7353 * if its replacement text matches the production labeled
7354 * content.
7355 */
7356 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7357 ctxt->depth++;
7358 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7359 user_data, &list);
7360 ctxt->depth--;
7361
7362 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7363 ctxt->depth++;
7364 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7365 user_data, ctxt->depth, ent->URI,
7366 ent->ExternalID, &list);
7367 ctxt->depth--;
7368 } else {
7369 ret = XML_ERR_ENTITY_PE_INTERNAL;
7370 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7371 "invalid entity type found\n", NULL);
7372 }
7373
7374 ent->flags &= ~XML_ENT_EXPANDING;
7375 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7376 ent->expandedSize = ctxt->sizeentcopy;
7377 if (ret == XML_ERR_ENTITY_LOOP) {
7378 xmlHaltParser(ctxt);
7379 xmlFreeNodeList(list);
7380 return;
7381 }
7382 if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7383 xmlFreeNodeList(list);
7384 return;
7385 }
7386
7387 if ((ret == XML_ERR_OK) && (list != NULL)) {
7388 ent->children = list;
7389 /*
7390 * Prune it directly in the generated document
7391 * except for single text nodes.
7392 */
7393 if ((ctxt->replaceEntities == 0) ||
7394 (ctxt->parseMode == XML_PARSE_READER) ||
7395 ((list->type == XML_TEXT_NODE) &&
7396 (list->next == NULL))) {
7397 ent->owner = 1;
7398 while (list != NULL) {
7399 list->parent = (xmlNodePtr) ent;
7400 if (list->doc != ent->doc)
7401 xmlSetTreeDoc(list, ent->doc);
7402 if (list->next == NULL)
7403 ent->last = list;
7404 list = list->next;
7405 }
7406 list = NULL;
7407 } else {
7408 ent->owner = 0;
7409 while (list != NULL) {
7410 list->parent = (xmlNodePtr) ctxt->node;
7411 list->doc = ctxt->myDoc;
7412 if (list->next == NULL)
7413 ent->last = list;
7414 list = list->next;
7415 }
7416 list = ent->children;
7417 #ifdef LIBXML_LEGACY_ENABLED
7418 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7419 xmlAddEntityReference(ent, list, NULL);
7420 #endif /* LIBXML_LEGACY_ENABLED */
7421 }
7422 } else if ((ret != XML_ERR_OK) &&
7423 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7424 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7425 "Entity '%s' failed to parse\n", ent->name);
7426 if (ent->content != NULL)
7427 ent->content[0] = 0;
7428 } else if (list != NULL) {
7429 xmlFreeNodeList(list);
7430 list = NULL;
7431 }
7432
7433 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7434 was_checked = 0;
7435 }
7436
7437 /*
7438 * Now that the entity content has been gathered
7439 * provide it to the application, this can take different forms based
7440 * on the parsing modes.
7441 */
7442 if (ent->children == NULL) {
7443 /*
7444 * Probably running in SAX mode and the callbacks don't
7445 * build the entity content. So unless we already went
7446 * though parsing for first checking go though the entity
7447 * content to generate callbacks associated to the entity
7448 */
7449 if (was_checked != 0) {
7450 void *user_data;
7451 /*
7452 * This is a bit hackish but this seems the best
7453 * way to make sure both SAX and DOM entity support
7454 * behaves okay.
7455 */
7456 if (ctxt->userData == ctxt)
7457 user_data = NULL;
7458 else
7459 user_data = ctxt->userData;
7460
7461 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7462 ctxt->depth++;
7463 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7464 ent->content, user_data, NULL);
7465 ctxt->depth--;
7466 } else if (ent->etype ==
7467 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7468 unsigned long oldsizeentities = ctxt->sizeentities;
7469
7470 ctxt->depth++;
7471 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7472 ctxt->sax, user_data, ctxt->depth,
7473 ent->URI, ent->ExternalID, NULL);
7474 ctxt->depth--;
7475
7476 /* Undo the change to sizeentities */
7477 ctxt->sizeentities = oldsizeentities;
7478 } else {
7479 ret = XML_ERR_ENTITY_PE_INTERNAL;
7480 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7481 "invalid entity type found\n", NULL);
7482 }
7483 if (ret == XML_ERR_ENTITY_LOOP) {
7484 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7485 return;
7486 }
7487 if (xmlParserEntityCheck(ctxt, 0))
7488 return;
7489 }
7490 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7491 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7492 /*
7493 * Entity reference callback comes second, it's somewhat
7494 * superfluous but a compatibility to historical behaviour
7495 */
7496 ctxt->sax->reference(ctxt->userData, ent->name);
7497 }
7498 return;
7499 }
7500
7501 /*
7502 * We also check for amplification if entities aren't substituted.
7503 * They might be expanded later.
7504 */
7505 if ((was_checked != 0) &&
7506 (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7507 return;
7508
7509 /*
7510 * If we didn't get any children for the entity being built
7511 */
7512 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7513 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7514 /*
7515 * Create a node.
7516 */
7517 ctxt->sax->reference(ctxt->userData, ent->name);
7518 return;
7519 }
7520
7521 if (ctxt->replaceEntities) {
7522 /*
7523 * There is a problem on the handling of _private for entities
7524 * (bug 155816): Should we copy the content of the field from
7525 * the entity (possibly overwriting some value set by the user
7526 * when a copy is created), should we leave it alone, or should
7527 * we try to take care of different situations? The problem
7528 * is exacerbated by the usage of this field by the xmlReader.
7529 * To fix this bug, we look at _private on the created node
7530 * and, if it's NULL, we copy in whatever was in the entity.
7531 * If it's not NULL we leave it alone. This is somewhat of a
7532 * hack - maybe we should have further tests to determine
7533 * what to do.
7534 */
7535 if (ctxt->node != NULL) {
7536 /*
7537 * Seems we are generating the DOM content, do
7538 * a simple tree copy for all references except the first
7539 * In the first occurrence list contains the replacement.
7540 */
7541 if (((list == NULL) && (ent->owner == 0)) ||
7542 (ctxt->parseMode == XML_PARSE_READER)) {
7543 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7544
7545 /*
7546 * when operating on a reader, the entities definitions
7547 * are always owning the entities subtree.
7548 if (ctxt->parseMode == XML_PARSE_READER)
7549 ent->owner = 1;
7550 */
7551
7552 cur = ent->children;
7553 while (cur != NULL) {
7554 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7555 if (nw != NULL) {
7556 if (nw->_private == NULL)
7557 nw->_private = cur->_private;
7558 if (firstChild == NULL){
7559 firstChild = nw;
7560 }
7561 nw = xmlAddChild(ctxt->node, nw);
7562 }
7563 if (cur == ent->last) {
7564 /*
7565 * needed to detect some strange empty
7566 * node cases in the reader tests
7567 */
7568 if ((ctxt->parseMode == XML_PARSE_READER) &&
7569 (nw != NULL) &&
7570 (nw->type == XML_ELEMENT_NODE) &&
7571 (nw->children == NULL))
7572 nw->extra = 1;
7573
7574 break;
7575 }
7576 cur = cur->next;
7577 }
7578 #ifdef LIBXML_LEGACY_ENABLED
7579 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7580 xmlAddEntityReference(ent, firstChild, nw);
7581 #endif /* LIBXML_LEGACY_ENABLED */
7582 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7583 xmlNodePtr nw = NULL, cur, next, last,
7584 firstChild = NULL;
7585
7586 /*
7587 * Copy the entity child list and make it the new
7588 * entity child list. The goal is to make sure any
7589 * ID or REF referenced will be the one from the
7590 * document content and not the entity copy.
7591 */
7592 cur = ent->children;
7593 ent->children = NULL;
7594 last = ent->last;
7595 ent->last = NULL;
7596 while (cur != NULL) {
7597 next = cur->next;
7598 cur->next = NULL;
7599 cur->parent = NULL;
7600 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7601 if (nw != NULL) {
7602 if (nw->_private == NULL)
7603 nw->_private = cur->_private;
7604 if (firstChild == NULL){
7605 firstChild = cur;
7606 }
7607 xmlAddChild((xmlNodePtr) ent, nw);
7608 }
7609 xmlAddChild(ctxt->node, cur);
7610 if (cur == last)
7611 break;
7612 cur = next;
7613 }
7614 if (ent->owner == 0)
7615 ent->owner = 1;
7616 #ifdef LIBXML_LEGACY_ENABLED
7617 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7618 xmlAddEntityReference(ent, firstChild, nw);
7619 #endif /* LIBXML_LEGACY_ENABLED */
7620 } else {
7621 const xmlChar *nbktext;
7622
7623 /*
7624 * the name change is to avoid coalescing of the
7625 * node with a possible previous text one which
7626 * would make ent->children a dangling pointer
7627 */
7628 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7629 -1);
7630 if (ent->children->type == XML_TEXT_NODE)
7631 ent->children->name = nbktext;
7632 if ((ent->last != ent->children) &&
7633 (ent->last->type == XML_TEXT_NODE))
7634 ent->last->name = nbktext;
7635 xmlAddChildList(ctxt->node, ent->children);
7636 }
7637
7638 /*
7639 * This is to avoid a nasty side effect, see
7640 * characters() in SAX.c
7641 */
7642 ctxt->nodemem = 0;
7643 ctxt->nodelen = 0;
7644 return;
7645 }
7646 }
7647 }
7648
7649 /**
7650 * xmlParseEntityRef:
7651 * @ctxt: an XML parser context
7652 *
7653 * DEPRECATED: Internal function, don't use.
7654 *
7655 * Parse an entitiy reference. Always consumes '&'.
7656 *
7657 * [68] EntityRef ::= '&' Name ';'
7658 *
7659 * [ WFC: Entity Declared ]
7660 * In a document without any DTD, a document with only an internal DTD
7661 * subset which contains no parameter entity references, or a document
7662 * with "standalone='yes'", the Name given in the entity reference
7663 * must match that in an entity declaration, except that well-formed
7664 * documents need not declare any of the following entities: amp, lt,
7665 * gt, apos, quot. The declaration of a parameter entity must precede
7666 * any reference to it. Similarly, the declaration of a general entity
7667 * must precede any reference to it which appears in a default value in an
7668 * attribute-list declaration. Note that if entities are declared in the
7669 * external subset or in external parameter entities, a non-validating
7670 * processor is not obligated to read and process their declarations;
7671 * for such documents, the rule that an entity must be declared is a
7672 * well-formedness constraint only if standalone='yes'.
7673 *
7674 * [ WFC: Parsed Entity ]
7675 * An entity reference must not contain the name of an unparsed entity
7676 *
7677 * Returns the xmlEntityPtr if found, or NULL otherwise.
7678 */
7679 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7680 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7681 const xmlChar *name;
7682 xmlEntityPtr ent = NULL;
7683
7684 GROW;
7685 if (ctxt->instate == XML_PARSER_EOF)
7686 return(NULL);
7687
7688 if (RAW != '&')
7689 return(NULL);
7690 NEXT;
7691 name = xmlParseName(ctxt);
7692 if (name == NULL) {
7693 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7694 "xmlParseEntityRef: no name\n");
7695 return(NULL);
7696 }
7697 if (RAW != ';') {
7698 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7699 return(NULL);
7700 }
7701 NEXT;
7702
7703 /*
7704 * Predefined entities override any extra definition
7705 */
7706 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7707 ent = xmlGetPredefinedEntity(name);
7708 if (ent != NULL)
7709 return(ent);
7710 }
7711
7712 /*
7713 * Ask first SAX for entity resolution, otherwise try the
7714 * entities which may have stored in the parser context.
7715 */
7716 if (ctxt->sax != NULL) {
7717 if (ctxt->sax->getEntity != NULL)
7718 ent = ctxt->sax->getEntity(ctxt->userData, name);
7719 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7720 (ctxt->options & XML_PARSE_OLDSAX))
7721 ent = xmlGetPredefinedEntity(name);
7722 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7723 (ctxt->userData==ctxt)) {
7724 ent = xmlSAX2GetEntity(ctxt, name);
7725 }
7726 }
7727 if (ctxt->instate == XML_PARSER_EOF)
7728 return(NULL);
7729 /*
7730 * [ WFC: Entity Declared ]
7731 * In a document without any DTD, a document with only an
7732 * internal DTD subset which contains no parameter entity
7733 * references, or a document with "standalone='yes'", the
7734 * Name given in the entity reference must match that in an
7735 * entity declaration, except that well-formed documents
7736 * need not declare any of the following entities: amp, lt,
7737 * gt, apos, quot.
7738 * The declaration of a parameter entity must precede any
7739 * reference to it.
7740 * Similarly, the declaration of a general entity must
7741 * precede any reference to it which appears in a default
7742 * value in an attribute-list declaration. Note that if
7743 * entities are declared in the external subset or in
7744 * external parameter entities, a non-validating processor
7745 * is not obligated to read and process their declarations;
7746 * for such documents, the rule that an entity must be
7747 * declared is a well-formedness constraint only if
7748 * standalone='yes'.
7749 */
7750 if (ent == NULL) {
7751 if ((ctxt->standalone == 1) ||
7752 ((ctxt->hasExternalSubset == 0) &&
7753 (ctxt->hasPErefs == 0))) {
7754 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7755 "Entity '%s' not defined\n", name);
7756 } else {
7757 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7758 "Entity '%s' not defined\n", name);
7759 if ((ctxt->inSubset == 0) &&
7760 (ctxt->sax != NULL) &&
7761 (ctxt->disableSAX == 0) &&
7762 (ctxt->sax->reference != NULL)) {
7763 ctxt->sax->reference(ctxt->userData, name);
7764 }
7765 }
7766 ctxt->valid = 0;
7767 }
7768
7769 /*
7770 * [ WFC: Parsed Entity ]
7771 * An entity reference must not contain the name of an
7772 * unparsed entity
7773 */
7774 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7775 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7776 "Entity reference to unparsed entity %s\n", name);
7777 }
7778
7779 /*
7780 * [ WFC: No External Entity References ]
7781 * Attribute values cannot contain direct or indirect
7782 * entity references to external entities.
7783 */
7784 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7785 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7786 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7787 "Attribute references external entity '%s'\n", name);
7788 }
7789 /*
7790 * [ WFC: No < in Attribute Values ]
7791 * The replacement text of any entity referred to directly or
7792 * indirectly in an attribute value (other than "<") must
7793 * not contain a <.
7794 */
7795 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7796 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7797 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7798 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7799 ent->flags |= XML_ENT_CONTAINS_LT;
7800 ent->flags |= XML_ENT_CHECKED_LT;
7801 }
7802 if (ent->flags & XML_ENT_CONTAINS_LT)
7803 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7804 "'<' in entity '%s' is not allowed in attributes "
7805 "values\n", name);
7806 }
7807
7808 /*
7809 * Internal check, no parameter entities here ...
7810 */
7811 else {
7812 switch (ent->etype) {
7813 case XML_INTERNAL_PARAMETER_ENTITY:
7814 case XML_EXTERNAL_PARAMETER_ENTITY:
7815 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7816 "Attempt to reference the parameter entity '%s'\n",
7817 name);
7818 break;
7819 default:
7820 break;
7821 }
7822 }
7823
7824 /*
7825 * [ WFC: No Recursion ]
7826 * A parsed entity must not contain a recursive reference
7827 * to itself, either directly or indirectly.
7828 * Done somewhere else
7829 */
7830 return(ent);
7831 }
7832
7833 /**
7834 * xmlParseStringEntityRef:
7835 * @ctxt: an XML parser context
7836 * @str: a pointer to an index in the string
7837 *
7838 * parse ENTITY references declarations, but this version parses it from
7839 * a string value.
7840 *
7841 * [68] EntityRef ::= '&' Name ';'
7842 *
7843 * [ WFC: Entity Declared ]
7844 * In a document without any DTD, a document with only an internal DTD
7845 * subset which contains no parameter entity references, or a document
7846 * with "standalone='yes'", the Name given in the entity reference
7847 * must match that in an entity declaration, except that well-formed
7848 * documents need not declare any of the following entities: amp, lt,
7849 * gt, apos, quot. The declaration of a parameter entity must precede
7850 * any reference to it. Similarly, the declaration of a general entity
7851 * must precede any reference to it which appears in a default value in an
7852 * attribute-list declaration. Note that if entities are declared in the
7853 * external subset or in external parameter entities, a non-validating
7854 * processor is not obligated to read and process their declarations;
7855 * for such documents, the rule that an entity must be declared is a
7856 * well-formedness constraint only if standalone='yes'.
7857 *
7858 * [ WFC: Parsed Entity ]
7859 * An entity reference must not contain the name of an unparsed entity
7860 *
7861 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7862 * is updated to the current location in the string.
7863 */
7864 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7865 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7866 xmlChar *name;
7867 const xmlChar *ptr;
7868 xmlChar cur;
7869 xmlEntityPtr ent = NULL;
7870
7871 if ((str == NULL) || (*str == NULL))
7872 return(NULL);
7873 ptr = *str;
7874 cur = *ptr;
7875 if (cur != '&')
7876 return(NULL);
7877
7878 ptr++;
7879 name = xmlParseStringName(ctxt, &ptr);
7880 if (name == NULL) {
7881 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7882 "xmlParseStringEntityRef: no name\n");
7883 *str = ptr;
7884 return(NULL);
7885 }
7886 if (*ptr != ';') {
7887 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7888 xmlFree(name);
7889 *str = ptr;
7890 return(NULL);
7891 }
7892 ptr++;
7893
7894
7895 /*
7896 * Predefined entities override any extra definition
7897 */
7898 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7899 ent = xmlGetPredefinedEntity(name);
7900 if (ent != NULL) {
7901 xmlFree(name);
7902 *str = ptr;
7903 return(ent);
7904 }
7905 }
7906
7907 /*
7908 * Ask first SAX for entity resolution, otherwise try the
7909 * entities which may have stored in the parser context.
7910 */
7911 if (ctxt->sax != NULL) {
7912 if (ctxt->sax->getEntity != NULL)
7913 ent = ctxt->sax->getEntity(ctxt->userData, name);
7914 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7915 ent = xmlGetPredefinedEntity(name);
7916 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7917 ent = xmlSAX2GetEntity(ctxt, name);
7918 }
7919 }
7920 if (ctxt->instate == XML_PARSER_EOF) {
7921 xmlFree(name);
7922 return(NULL);
7923 }
7924
7925 /*
7926 * [ WFC: Entity Declared ]
7927 * In a document without any DTD, a document with only an
7928 * internal DTD subset which contains no parameter entity
7929 * references, or a document with "standalone='yes'", the
7930 * Name given in the entity reference must match that in an
7931 * entity declaration, except that well-formed documents
7932 * need not declare any of the following entities: amp, lt,
7933 * gt, apos, quot.
7934 * The declaration of a parameter entity must precede any
7935 * reference to it.
7936 * Similarly, the declaration of a general entity must
7937 * precede any reference to it which appears in a default
7938 * value in an attribute-list declaration. Note that if
7939 * entities are declared in the external subset or in
7940 * external parameter entities, a non-validating processor
7941 * is not obligated to read and process their declarations;
7942 * for such documents, the rule that an entity must be
7943 * declared is a well-formedness constraint only if
7944 * standalone='yes'.
7945 */
7946 if (ent == NULL) {
7947 if ((ctxt->standalone == 1) ||
7948 ((ctxt->hasExternalSubset == 0) &&
7949 (ctxt->hasPErefs == 0))) {
7950 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7951 "Entity '%s' not defined\n", name);
7952 } else {
7953 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7954 "Entity '%s' not defined\n",
7955 name);
7956 }
7957 /* TODO ? check regressions ctxt->valid = 0; */
7958 }
7959
7960 /*
7961 * [ WFC: Parsed Entity ]
7962 * An entity reference must not contain the name of an
7963 * unparsed entity
7964 */
7965 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7966 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7967 "Entity reference to unparsed entity %s\n", name);
7968 }
7969
7970 /*
7971 * [ WFC: No External Entity References ]
7972 * Attribute values cannot contain direct or indirect
7973 * entity references to external entities.
7974 */
7975 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7976 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7977 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7978 "Attribute references external entity '%s'\n", name);
7979 }
7980 /*
7981 * [ WFC: No < in Attribute Values ]
7982 * The replacement text of any entity referred to directly or
7983 * indirectly in an attribute value (other than "<") must
7984 * not contain a <.
7985 */
7986 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7987 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7988 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7989 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7990 ent->flags |= XML_ENT_CONTAINS_LT;
7991 ent->flags |= XML_ENT_CHECKED_LT;
7992 }
7993 if (ent->flags & XML_ENT_CONTAINS_LT)
7994 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7995 "'<' in entity '%s' is not allowed in attributes "
7996 "values\n", name);
7997 }
7998
7999 /*
8000 * Internal check, no parameter entities here ...
8001 */
8002 else {
8003 switch (ent->etype) {
8004 case XML_INTERNAL_PARAMETER_ENTITY:
8005 case XML_EXTERNAL_PARAMETER_ENTITY:
8006 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
8007 "Attempt to reference the parameter entity '%s'\n",
8008 name);
8009 break;
8010 default:
8011 break;
8012 }
8013 }
8014
8015 /*
8016 * [ WFC: No Recursion ]
8017 * A parsed entity must not contain a recursive reference
8018 * to itself, either directly or indirectly.
8019 * Done somewhere else
8020 */
8021
8022 xmlFree(name);
8023 *str = ptr;
8024 return(ent);
8025 }
8026
8027 /**
8028 * xmlParsePEReference:
8029 * @ctxt: an XML parser context
8030 *
8031 * DEPRECATED: Internal function, don't use.
8032 *
8033 * Parse a parameter entity reference. Always consumes '%'.
8034 *
8035 * The entity content is handled directly by pushing it's content as
8036 * a new input stream.
8037 *
8038 * [69] PEReference ::= '%' Name ';'
8039 *
8040 * [ WFC: No Recursion ]
8041 * A parsed entity must not contain a recursive
8042 * reference to itself, either directly or indirectly.
8043 *
8044 * [ WFC: Entity Declared ]
8045 * In a document without any DTD, a document with only an internal DTD
8046 * subset which contains no parameter entity references, or a document
8047 * with "standalone='yes'", ... ... The declaration of a parameter
8048 * entity must precede any reference to it...
8049 *
8050 * [ VC: Entity Declared ]
8051 * In a document with an external subset or external parameter entities
8052 * with "standalone='no'", ... ... The declaration of a parameter entity
8053 * must precede any reference to it...
8054 *
8055 * [ WFC: In DTD ]
8056 * Parameter-entity references may only appear in the DTD.
8057 * NOTE: misleading but this is handled.
8058 */
8059 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)8060 xmlParsePEReference(xmlParserCtxtPtr ctxt)
8061 {
8062 const xmlChar *name;
8063 xmlEntityPtr entity = NULL;
8064 xmlParserInputPtr input;
8065
8066 if (RAW != '%')
8067 return;
8068 NEXT;
8069 name = xmlParseName(ctxt);
8070 if (name == NULL) {
8071 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8072 return;
8073 }
8074 if (xmlParserDebugEntities)
8075 xmlGenericError(xmlGenericErrorContext,
8076 "PEReference: %s\n", name);
8077 if (RAW != ';') {
8078 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8079 return;
8080 }
8081
8082 NEXT;
8083
8084 /*
8085 * Request the entity from SAX
8086 */
8087 if ((ctxt->sax != NULL) &&
8088 (ctxt->sax->getParameterEntity != NULL))
8089 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8090 if (ctxt->instate == XML_PARSER_EOF)
8091 return;
8092 if (entity == NULL) {
8093 /*
8094 * [ WFC: Entity Declared ]
8095 * In a document without any DTD, a document with only an
8096 * internal DTD subset which contains no parameter entity
8097 * references, or a document with "standalone='yes'", ...
8098 * ... The declaration of a parameter entity must precede
8099 * any reference to it...
8100 */
8101 if ((ctxt->standalone == 1) ||
8102 ((ctxt->hasExternalSubset == 0) &&
8103 (ctxt->hasPErefs == 0))) {
8104 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8105 "PEReference: %%%s; not found\n",
8106 name);
8107 } else {
8108 /*
8109 * [ VC: Entity Declared ]
8110 * In a document with an external subset or external
8111 * parameter entities with "standalone='no'", ...
8112 * ... The declaration of a parameter entity must
8113 * precede any reference to it...
8114 */
8115 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8116 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8117 "PEReference: %%%s; not found\n",
8118 name, NULL);
8119 } else
8120 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8121 "PEReference: %%%s; not found\n",
8122 name, NULL);
8123 ctxt->valid = 0;
8124 }
8125 } else {
8126 /*
8127 * Internal checking in case the entity quest barfed
8128 */
8129 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8130 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8131 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8132 "Internal: %%%s; is not a parameter entity\n",
8133 name, NULL);
8134 } else {
8135 unsigned long parentConsumed;
8136 xmlEntityPtr oldEnt;
8137
8138 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8139 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8140 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8141 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8142 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8143 (ctxt->replaceEntities == 0) &&
8144 (ctxt->validate == 0))
8145 return;
8146
8147 if (entity->flags & XML_ENT_EXPANDING) {
8148 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8149 xmlHaltParser(ctxt);
8150 return;
8151 }
8152
8153 /* Must be computed from old input before pushing new input. */
8154 parentConsumed = ctxt->input->parentConsumed;
8155 oldEnt = ctxt->input->entity;
8156 if ((oldEnt == NULL) ||
8157 ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8158 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8159 xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8160 xmlSaturatedAddSizeT(&parentConsumed,
8161 ctxt->input->cur - ctxt->input->base);
8162 }
8163
8164 input = xmlNewEntityInputStream(ctxt, entity);
8165 if (xmlPushInput(ctxt, input) < 0) {
8166 xmlFreeInputStream(input);
8167 return;
8168 }
8169
8170 entity->flags |= XML_ENT_EXPANDING;
8171
8172 input->parentConsumed = parentConsumed;
8173
8174 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8175 xmlDetectEncoding(ctxt);
8176
8177 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8178 (IS_BLANK_CH(NXT(5)))) {
8179 xmlParseTextDecl(ctxt);
8180 }
8181 }
8182 }
8183 }
8184 ctxt->hasPErefs = 1;
8185 }
8186
8187 /**
8188 * xmlLoadEntityContent:
8189 * @ctxt: an XML parser context
8190 * @entity: an unloaded system entity
8191 *
8192 * Load the original content of the given system entity from the
8193 * ExternalID/SystemID given. This is to be used for Included in Literal
8194 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8195 *
8196 * Returns 0 in case of success and -1 in case of failure
8197 */
8198 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)8199 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8200 xmlParserInputPtr oldinput, input = NULL;
8201 xmlParserInputPtr *oldinputTab;
8202 const xmlChar *oldencoding;
8203 xmlChar *content = NULL;
8204 size_t length, i;
8205 int oldinputNr, oldinputMax, oldprogressive;
8206 int ret = -1;
8207 int res;
8208
8209 if ((ctxt == NULL) || (entity == NULL) ||
8210 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8211 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8212 (entity->content != NULL)) {
8213 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8214 "xmlLoadEntityContent parameter error");
8215 return(-1);
8216 }
8217
8218 if (xmlParserDebugEntities)
8219 xmlGenericError(xmlGenericErrorContext,
8220 "Reading %s entity content input\n", entity->name);
8221
8222 input = xmlLoadExternalEntity((char *) entity->URI,
8223 (char *) entity->ExternalID, ctxt);
8224 if (input == NULL) {
8225 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8226 "xmlLoadEntityContent input error");
8227 return(-1);
8228 }
8229
8230 oldinput = ctxt->input;
8231 oldinputNr = ctxt->inputNr;
8232 oldinputMax = ctxt->inputMax;
8233 oldinputTab = ctxt->inputTab;
8234 oldencoding = ctxt->encoding;
8235 oldprogressive = ctxt->progressive;
8236
8237 ctxt->input = NULL;
8238 ctxt->inputNr = 0;
8239 ctxt->inputMax = 1;
8240 ctxt->encoding = NULL;
8241 ctxt->progressive = 0;
8242 ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
8243 if (ctxt->inputTab == NULL) {
8244 xmlErrMemory(ctxt, NULL);
8245 xmlFreeInputStream(input);
8246 goto error;
8247 }
8248
8249 xmlBufResetInput(input->buf->buffer, input);
8250
8251 inputPush(ctxt, input);
8252
8253 xmlDetectEncoding(ctxt);
8254
8255 /*
8256 * Parse a possible text declaration first
8257 */
8258 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
8259 xmlParseTextDecl(ctxt);
8260 /*
8261 * An XML-1.0 document can't reference an entity not XML-1.0
8262 */
8263 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
8264 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
8265 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
8266 "Version mismatch between document and entity\n");
8267 }
8268 }
8269
8270 if (ctxt->instate == XML_PARSER_EOF)
8271 goto error;
8272
8273 length = input->cur - input->base;
8274 xmlBufShrink(input->buf->buffer, length);
8275 xmlSaturatedAdd(&ctxt->sizeentities, length);
8276
8277 while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8278 ;
8279
8280 xmlBufResetInput(input->buf->buffer, input);
8281
8282 if (res < 0) {
8283 xmlFatalErr(ctxt, input->buf->error, NULL);
8284 goto error;
8285 }
8286
8287 length = xmlBufUse(input->buf->buffer);
8288 content = xmlBufDetach(input->buf->buffer);
8289
8290 if (length > INT_MAX) {
8291 xmlErrMemory(ctxt, NULL);
8292 goto error;
8293 }
8294
8295 for (i = 0; i < length; ) {
8296 int clen = length - i;
8297 int c = xmlGetUTF8Char(content + i, &clen);
8298
8299 if ((c < 0) || (!IS_CHAR(c))) {
8300 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8301 "xmlLoadEntityContent: invalid char value %d\n",
8302 content[i]);
8303 goto error;
8304 }
8305 i += clen;
8306 }
8307
8308 xmlSaturatedAdd(&ctxt->sizeentities, length);
8309 entity->content = content;
8310 entity->length = length;
8311 content = NULL;
8312 ret = 0;
8313
8314 error:
8315 while (ctxt->inputNr > 0)
8316 xmlFreeInputStream(inputPop(ctxt));
8317 xmlFree(ctxt->inputTab);
8318 xmlFree((xmlChar *) ctxt->encoding);
8319
8320 ctxt->input = oldinput;
8321 ctxt->inputNr = oldinputNr;
8322 ctxt->inputMax = oldinputMax;
8323 ctxt->inputTab = oldinputTab;
8324 ctxt->encoding = oldencoding;
8325 ctxt->progressive = oldprogressive;
8326
8327 xmlFree(content);
8328
8329 return(ret);
8330 }
8331
8332 /**
8333 * xmlParseStringPEReference:
8334 * @ctxt: an XML parser context
8335 * @str: a pointer to an index in the string
8336 *
8337 * parse PEReference declarations
8338 *
8339 * [69] PEReference ::= '%' Name ';'
8340 *
8341 * [ WFC: No Recursion ]
8342 * A parsed entity must not contain a recursive
8343 * reference to itself, either directly or indirectly.
8344 *
8345 * [ WFC: Entity Declared ]
8346 * In a document without any DTD, a document with only an internal DTD
8347 * subset which contains no parameter entity references, or a document
8348 * with "standalone='yes'", ... ... The declaration of a parameter
8349 * entity must precede any reference to it...
8350 *
8351 * [ VC: Entity Declared ]
8352 * In a document with an external subset or external parameter entities
8353 * with "standalone='no'", ... ... The declaration of a parameter entity
8354 * must precede any reference to it...
8355 *
8356 * [ WFC: In DTD ]
8357 * Parameter-entity references may only appear in the DTD.
8358 * NOTE: misleading but this is handled.
8359 *
8360 * Returns the string of the entity content.
8361 * str is updated to the current value of the index
8362 */
8363 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8364 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8365 const xmlChar *ptr;
8366 xmlChar cur;
8367 xmlChar *name;
8368 xmlEntityPtr entity = NULL;
8369
8370 if ((str == NULL) || (*str == NULL)) return(NULL);
8371 ptr = *str;
8372 cur = *ptr;
8373 if (cur != '%')
8374 return(NULL);
8375 ptr++;
8376 name = xmlParseStringName(ctxt, &ptr);
8377 if (name == NULL) {
8378 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8379 "xmlParseStringPEReference: no name\n");
8380 *str = ptr;
8381 return(NULL);
8382 }
8383 cur = *ptr;
8384 if (cur != ';') {
8385 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8386 xmlFree(name);
8387 *str = ptr;
8388 return(NULL);
8389 }
8390 ptr++;
8391
8392 /*
8393 * Request the entity from SAX
8394 */
8395 if ((ctxt->sax != NULL) &&
8396 (ctxt->sax->getParameterEntity != NULL))
8397 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8398 if (ctxt->instate == XML_PARSER_EOF) {
8399 xmlFree(name);
8400 *str = ptr;
8401 return(NULL);
8402 }
8403 if (entity == NULL) {
8404 /*
8405 * [ WFC: Entity Declared ]
8406 * In a document without any DTD, a document with only an
8407 * internal DTD subset which contains no parameter entity
8408 * references, or a document with "standalone='yes'", ...
8409 * ... The declaration of a parameter entity must precede
8410 * any reference to it...
8411 */
8412 if ((ctxt->standalone == 1) ||
8413 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8414 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8415 "PEReference: %%%s; not found\n", name);
8416 } else {
8417 /*
8418 * [ VC: Entity Declared ]
8419 * In a document with an external subset or external
8420 * parameter entities with "standalone='no'", ...
8421 * ... The declaration of a parameter entity must
8422 * precede any reference to it...
8423 */
8424 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8425 "PEReference: %%%s; not found\n",
8426 name, NULL);
8427 ctxt->valid = 0;
8428 }
8429 } else {
8430 /*
8431 * Internal checking in case the entity quest barfed
8432 */
8433 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8434 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8435 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8436 "%%%s; is not a parameter entity\n",
8437 name, NULL);
8438 }
8439 }
8440 ctxt->hasPErefs = 1;
8441 xmlFree(name);
8442 *str = ptr;
8443 return(entity);
8444 }
8445
8446 /**
8447 * xmlParseDocTypeDecl:
8448 * @ctxt: an XML parser context
8449 *
8450 * DEPRECATED: Internal function, don't use.
8451 *
8452 * parse a DOCTYPE declaration
8453 *
8454 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8455 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8456 *
8457 * [ VC: Root Element Type ]
8458 * The Name in the document type declaration must match the element
8459 * type of the root element.
8460 */
8461
8462 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8463 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8464 const xmlChar *name = NULL;
8465 xmlChar *ExternalID = NULL;
8466 xmlChar *URI = NULL;
8467
8468 /*
8469 * We know that '<!DOCTYPE' has been detected.
8470 */
8471 SKIP(9);
8472
8473 SKIP_BLANKS;
8474
8475 /*
8476 * Parse the DOCTYPE name.
8477 */
8478 name = xmlParseName(ctxt);
8479 if (name == NULL) {
8480 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8481 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8482 }
8483 ctxt->intSubName = name;
8484
8485 SKIP_BLANKS;
8486
8487 /*
8488 * Check for SystemID and ExternalID
8489 */
8490 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8491
8492 if ((URI != NULL) || (ExternalID != NULL)) {
8493 ctxt->hasExternalSubset = 1;
8494 }
8495 ctxt->extSubURI = URI;
8496 ctxt->extSubSystem = ExternalID;
8497
8498 SKIP_BLANKS;
8499
8500 /*
8501 * Create and update the internal subset.
8502 */
8503 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8504 (!ctxt->disableSAX))
8505 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8506 if (ctxt->instate == XML_PARSER_EOF)
8507 return;
8508
8509 /*
8510 * Is there any internal subset declarations ?
8511 * they are handled separately in xmlParseInternalSubset()
8512 */
8513 if (RAW == '[')
8514 return;
8515
8516 /*
8517 * We should be at the end of the DOCTYPE declaration.
8518 */
8519 if (RAW != '>') {
8520 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8521 }
8522 NEXT;
8523 }
8524
8525 /**
8526 * xmlParseInternalSubset:
8527 * @ctxt: an XML parser context
8528 *
8529 * parse the internal subset declaration
8530 *
8531 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8532 */
8533
8534 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8535 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8536 /*
8537 * Is there any DTD definition ?
8538 */
8539 if (RAW == '[') {
8540 int baseInputNr = ctxt->inputNr;
8541 ctxt->instate = XML_PARSER_DTD;
8542 NEXT;
8543 /*
8544 * Parse the succession of Markup declarations and
8545 * PEReferences.
8546 * Subsequence (markupdecl | PEReference | S)*
8547 */
8548 SKIP_BLANKS;
8549 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8550 (ctxt->instate != XML_PARSER_EOF)) {
8551
8552 /*
8553 * Conditional sections are allowed from external entities included
8554 * by PE References in the internal subset.
8555 */
8556 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8557 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8558 xmlParseConditionalSections(ctxt);
8559 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8560 xmlParseMarkupDecl(ctxt);
8561 } else if (RAW == '%') {
8562 xmlParsePEReference(ctxt);
8563 } else {
8564 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8565 "xmlParseInternalSubset: error detected in"
8566 " Markup declaration\n");
8567 xmlHaltParser(ctxt);
8568 return;
8569 }
8570 SKIP_BLANKS;
8571 SHRINK;
8572 GROW;
8573 }
8574 if (RAW == ']') {
8575 NEXT;
8576 SKIP_BLANKS;
8577 }
8578 }
8579
8580 /*
8581 * We should be at the end of the DOCTYPE declaration.
8582 */
8583 if (RAW != '>') {
8584 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8585 return;
8586 }
8587 NEXT;
8588 }
8589
8590 #ifdef LIBXML_SAX1_ENABLED
8591 /**
8592 * xmlParseAttribute:
8593 * @ctxt: an XML parser context
8594 * @value: a xmlChar ** used to store the value of the attribute
8595 *
8596 * DEPRECATED: Internal function, don't use.
8597 *
8598 * parse an attribute
8599 *
8600 * [41] Attribute ::= Name Eq AttValue
8601 *
8602 * [ WFC: No External Entity References ]
8603 * Attribute values cannot contain direct or indirect entity references
8604 * to external entities.
8605 *
8606 * [ WFC: No < in Attribute Values ]
8607 * The replacement text of any entity referred to directly or indirectly in
8608 * an attribute value (other than "<") must not contain a <.
8609 *
8610 * [ VC: Attribute Value Type ]
8611 * The attribute must have been declared; the value must be of the type
8612 * declared for it.
8613 *
8614 * [25] Eq ::= S? '=' S?
8615 *
8616 * With namespace:
8617 *
8618 * [NS 11] Attribute ::= QName Eq AttValue
8619 *
8620 * Also the case QName == xmlns:??? is handled independently as a namespace
8621 * definition.
8622 *
8623 * Returns the attribute name, and the value in *value.
8624 */
8625
8626 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8627 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8628 const xmlChar *name;
8629 xmlChar *val;
8630
8631 *value = NULL;
8632 GROW;
8633 name = xmlParseName(ctxt);
8634 if (name == NULL) {
8635 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8636 "error parsing attribute name\n");
8637 return(NULL);
8638 }
8639
8640 /*
8641 * read the value
8642 */
8643 SKIP_BLANKS;
8644 if (RAW == '=') {
8645 NEXT;
8646 SKIP_BLANKS;
8647 val = xmlParseAttValue(ctxt);
8648 ctxt->instate = XML_PARSER_CONTENT;
8649 } else {
8650 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8651 "Specification mandates value for attribute %s\n", name);
8652 return(name);
8653 }
8654
8655 /*
8656 * Check that xml:lang conforms to the specification
8657 * No more registered as an error, just generate a warning now
8658 * since this was deprecated in XML second edition
8659 */
8660 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8661 if (!xmlCheckLanguageID(val)) {
8662 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8663 "Malformed value for xml:lang : %s\n",
8664 val, NULL);
8665 }
8666 }
8667
8668 /*
8669 * Check that xml:space conforms to the specification
8670 */
8671 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8672 if (xmlStrEqual(val, BAD_CAST "default"))
8673 *(ctxt->space) = 0;
8674 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8675 *(ctxt->space) = 1;
8676 else {
8677 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8678 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8679 val, NULL);
8680 }
8681 }
8682
8683 *value = val;
8684 return(name);
8685 }
8686
8687 /**
8688 * xmlParseStartTag:
8689 * @ctxt: an XML parser context
8690 *
8691 * DEPRECATED: Internal function, don't use.
8692 *
8693 * Parse a start tag. Always consumes '<'.
8694 *
8695 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8696 *
8697 * [ WFC: Unique Att Spec ]
8698 * No attribute name may appear more than once in the same start-tag or
8699 * empty-element tag.
8700 *
8701 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8702 *
8703 * [ WFC: Unique Att Spec ]
8704 * No attribute name may appear more than once in the same start-tag or
8705 * empty-element tag.
8706 *
8707 * With namespace:
8708 *
8709 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8710 *
8711 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8712 *
8713 * Returns the element name parsed
8714 */
8715
8716 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8717 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8718 const xmlChar *name;
8719 const xmlChar *attname;
8720 xmlChar *attvalue;
8721 const xmlChar **atts = ctxt->atts;
8722 int nbatts = 0;
8723 int maxatts = ctxt->maxatts;
8724 int i;
8725
8726 if (RAW != '<') return(NULL);
8727 NEXT1;
8728
8729 name = xmlParseName(ctxt);
8730 if (name == NULL) {
8731 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8732 "xmlParseStartTag: invalid element name\n");
8733 return(NULL);
8734 }
8735
8736 /*
8737 * Now parse the attributes, it ends up with the ending
8738 *
8739 * (S Attribute)* S?
8740 */
8741 SKIP_BLANKS;
8742 GROW;
8743
8744 while (((RAW != '>') &&
8745 ((RAW != '/') || (NXT(1) != '>')) &&
8746 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8747 attname = xmlParseAttribute(ctxt, &attvalue);
8748 if (attname == NULL) {
8749 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8750 "xmlParseStartTag: problem parsing attributes\n");
8751 break;
8752 }
8753 if (attvalue != NULL) {
8754 /*
8755 * [ WFC: Unique Att Spec ]
8756 * No attribute name may appear more than once in the same
8757 * start-tag or empty-element tag.
8758 */
8759 for (i = 0; i < nbatts;i += 2) {
8760 if (xmlStrEqual(atts[i], attname)) {
8761 xmlErrAttributeDup(ctxt, NULL, attname);
8762 xmlFree(attvalue);
8763 goto failed;
8764 }
8765 }
8766 /*
8767 * Add the pair to atts
8768 */
8769 if (atts == NULL) {
8770 maxatts = 22; /* allow for 10 attrs by default */
8771 atts = (const xmlChar **)
8772 xmlMalloc(maxatts * sizeof(xmlChar *));
8773 if (atts == NULL) {
8774 xmlErrMemory(ctxt, NULL);
8775 if (attvalue != NULL)
8776 xmlFree(attvalue);
8777 goto failed;
8778 }
8779 ctxt->atts = atts;
8780 ctxt->maxatts = maxatts;
8781 } else if (nbatts + 4 > maxatts) {
8782 const xmlChar **n;
8783
8784 maxatts *= 2;
8785 n = (const xmlChar **) xmlRealloc((void *) atts,
8786 maxatts * sizeof(const xmlChar *));
8787 if (n == NULL) {
8788 xmlErrMemory(ctxt, NULL);
8789 if (attvalue != NULL)
8790 xmlFree(attvalue);
8791 goto failed;
8792 }
8793 atts = n;
8794 ctxt->atts = atts;
8795 ctxt->maxatts = maxatts;
8796 }
8797 atts[nbatts++] = attname;
8798 atts[nbatts++] = attvalue;
8799 atts[nbatts] = NULL;
8800 atts[nbatts + 1] = NULL;
8801 } else {
8802 if (attvalue != NULL)
8803 xmlFree(attvalue);
8804 }
8805
8806 failed:
8807
8808 GROW
8809 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8810 break;
8811 if (SKIP_BLANKS == 0) {
8812 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8813 "attributes construct error\n");
8814 }
8815 SHRINK;
8816 GROW;
8817 }
8818
8819 /*
8820 * SAX: Start of Element !
8821 */
8822 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8823 (!ctxt->disableSAX)) {
8824 if (nbatts > 0)
8825 ctxt->sax->startElement(ctxt->userData, name, atts);
8826 else
8827 ctxt->sax->startElement(ctxt->userData, name, NULL);
8828 }
8829
8830 if (atts != NULL) {
8831 /* Free only the content strings */
8832 for (i = 1;i < nbatts;i+=2)
8833 if (atts[i] != NULL)
8834 xmlFree((xmlChar *) atts[i]);
8835 }
8836 return(name);
8837 }
8838
8839 /**
8840 * xmlParseEndTag1:
8841 * @ctxt: an XML parser context
8842 * @line: line of the start tag
8843 * @nsNr: number of namespaces on the start tag
8844 *
8845 * Parse an end tag. Always consumes '</'.
8846 *
8847 * [42] ETag ::= '</' Name S? '>'
8848 *
8849 * With namespace
8850 *
8851 * [NS 9] ETag ::= '</' QName S? '>'
8852 */
8853
8854 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8855 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8856 const xmlChar *name;
8857
8858 GROW;
8859 if ((RAW != '<') || (NXT(1) != '/')) {
8860 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8861 "xmlParseEndTag: '</' not found\n");
8862 return;
8863 }
8864 SKIP(2);
8865
8866 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8867
8868 /*
8869 * We should definitely be at the ending "S? '>'" part
8870 */
8871 GROW;
8872 SKIP_BLANKS;
8873 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8874 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8875 } else
8876 NEXT1;
8877
8878 /*
8879 * [ WFC: Element Type Match ]
8880 * The Name in an element's end-tag must match the element type in the
8881 * start-tag.
8882 *
8883 */
8884 if (name != (xmlChar*)1) {
8885 if (name == NULL) name = BAD_CAST "unparsable";
8886 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8887 "Opening and ending tag mismatch: %s line %d and %s\n",
8888 ctxt->name, line, name);
8889 }
8890
8891 /*
8892 * SAX: End of Tag
8893 */
8894 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8895 (!ctxt->disableSAX))
8896 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8897
8898 namePop(ctxt);
8899 spacePop(ctxt);
8900 return;
8901 }
8902
8903 /**
8904 * xmlParseEndTag:
8905 * @ctxt: an XML parser context
8906 *
8907 * DEPRECATED: Internal function, don't use.
8908 *
8909 * parse an end of tag
8910 *
8911 * [42] ETag ::= '</' Name S? '>'
8912 *
8913 * With namespace
8914 *
8915 * [NS 9] ETag ::= '</' QName S? '>'
8916 */
8917
8918 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8919 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8920 xmlParseEndTag1(ctxt, 0);
8921 }
8922 #endif /* LIBXML_SAX1_ENABLED */
8923
8924 /************************************************************************
8925 * *
8926 * SAX 2 specific operations *
8927 * *
8928 ************************************************************************/
8929
8930 /**
8931 * xmlParseQNameHashed:
8932 * @ctxt: an XML parser context
8933 * @prefix: pointer to store the prefix part
8934 *
8935 * parse an XML Namespace QName
8936 *
8937 * [6] QName ::= (Prefix ':')? LocalPart
8938 * [7] Prefix ::= NCName
8939 * [8] LocalPart ::= NCName
8940 *
8941 * Returns the Name parsed or NULL
8942 */
8943
8944 static xmlHashedString
xmlParseQNameHashed(xmlParserCtxtPtr ctxt,xmlHashedString * prefix)8945 xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8946 xmlHashedString l, p;
8947 int start;
8948
8949 l.name = NULL;
8950 p.name = NULL;
8951
8952 GROW;
8953 if (ctxt->instate == XML_PARSER_EOF)
8954 return(l);
8955 start = CUR_PTR - BASE_PTR;
8956
8957 l = xmlParseNCName(ctxt);
8958 if ((l.name != NULL) && (CUR == ':')) {
8959 NEXT;
8960 p = l;
8961 l = xmlParseNCName(ctxt);
8962 }
8963 if ((l.name == NULL) || (CUR == ':')) {
8964 xmlChar *tmp;
8965
8966 l.name = NULL;
8967 p.name = NULL;
8968 if (ctxt->instate == XML_PARSER_EOF)
8969 return(l);
8970 if ((CUR != ':') && (CUR_PTR <= BASE_PTR + start))
8971 return(l);
8972 tmp = xmlParseNmtoken(ctxt);
8973 if (tmp != NULL)
8974 xmlFree(tmp);
8975 if (ctxt->instate == XML_PARSER_EOF)
8976 return(l);
8977 l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8978 CUR_PTR - (BASE_PTR + start));
8979 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8980 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8981 }
8982
8983 *prefix = p;
8984 return(l);
8985 }
8986
8987 /**
8988 * xmlParseQName:
8989 * @ctxt: an XML parser context
8990 * @prefix: pointer to store the prefix part
8991 *
8992 * parse an XML Namespace QName
8993 *
8994 * [6] QName ::= (Prefix ':')? LocalPart
8995 * [7] Prefix ::= NCName
8996 * [8] LocalPart ::= NCName
8997 *
8998 * Returns the Name parsed or NULL
8999 */
9000
9001 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)9002 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
9003 xmlHashedString n, p;
9004
9005 n = xmlParseQNameHashed(ctxt, &p);
9006 if (n.name == NULL)
9007 return(NULL);
9008 *prefix = p.name;
9009 return(n.name);
9010 }
9011
9012 /**
9013 * xmlParseQNameAndCompare:
9014 * @ctxt: an XML parser context
9015 * @name: the localname
9016 * @prefix: the prefix, if any.
9017 *
9018 * parse an XML name and compares for match
9019 * (specialized for endtag parsing)
9020 *
9021 * Returns NULL for an illegal name, (xmlChar*) 1 for success
9022 * and the name for mismatch
9023 */
9024
9025 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)9026 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
9027 xmlChar const *prefix) {
9028 const xmlChar *cmp;
9029 const xmlChar *in;
9030 const xmlChar *ret;
9031 const xmlChar *prefix2;
9032
9033 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
9034
9035 GROW;
9036 in = ctxt->input->cur;
9037
9038 cmp = prefix;
9039 while (*in != 0 && *in == *cmp) {
9040 ++in;
9041 ++cmp;
9042 }
9043 if ((*cmp == 0) && (*in == ':')) {
9044 in++;
9045 cmp = name;
9046 while (*in != 0 && *in == *cmp) {
9047 ++in;
9048 ++cmp;
9049 }
9050 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
9051 /* success */
9052 ctxt->input->col += in - ctxt->input->cur;
9053 ctxt->input->cur = in;
9054 return((const xmlChar*) 1);
9055 }
9056 }
9057 /*
9058 * all strings coms from the dictionary, equality can be done directly
9059 */
9060 ret = xmlParseQName (ctxt, &prefix2);
9061 if (ret == NULL)
9062 return(NULL);
9063 if ((ret == name) && (prefix == prefix2))
9064 return((const xmlChar*) 1);
9065 return ret;
9066 }
9067
9068 /**
9069 * xmlParseAttValueInternal:
9070 * @ctxt: an XML parser context
9071 * @len: attribute len result
9072 * @alloc: whether the attribute was reallocated as a new string
9073 * @normalize: if 1 then further non-CDATA normalization must be done
9074 *
9075 * parse a value for an attribute.
9076 * NOTE: if no normalization is needed, the routine will return pointers
9077 * directly from the data buffer.
9078 *
9079 * 3.3.3 Attribute-Value Normalization:
9080 * Before the value of an attribute is passed to the application or
9081 * checked for validity, the XML processor must normalize it as follows:
9082 * - a character reference is processed by appending the referenced
9083 * character to the attribute value
9084 * - an entity reference is processed by recursively processing the
9085 * replacement text of the entity
9086 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9087 * appending #x20 to the normalized value, except that only a single
9088 * #x20 is appended for a "#xD#xA" sequence that is part of an external
9089 * parsed entity or the literal entity value of an internal parsed entity
9090 * - other characters are processed by appending them to the normalized value
9091 * If the declared value is not CDATA, then the XML processor must further
9092 * process the normalized attribute value by discarding any leading and
9093 * trailing space (#x20) characters, and by replacing sequences of space
9094 * (#x20) characters by a single space (#x20) character.
9095 * All attributes for which no declaration has been read should be treated
9096 * by a non-validating parser as if declared CDATA.
9097 *
9098 * Returns the AttValue parsed or NULL. The value has to be freed by the
9099 * caller if it was copied, this can be detected by val[*len] == 0.
9100 */
9101
9102 #define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9103 const xmlChar *oldbase = ctxt->input->base;\
9104 GROW;\
9105 if (ctxt->instate == XML_PARSER_EOF)\
9106 return(NULL);\
9107 if (oldbase != ctxt->input->base) {\
9108 ptrdiff_t delta = ctxt->input->base - oldbase;\
9109 start = start + delta;\
9110 in = in + delta;\
9111 }\
9112 end = ctxt->input->end;
9113
9114 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)9115 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9116 int normalize)
9117 {
9118 xmlChar limit = 0;
9119 const xmlChar *in = NULL, *start, *end, *last;
9120 xmlChar *ret = NULL;
9121 int line, col;
9122 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9123 XML_MAX_HUGE_LENGTH :
9124 XML_MAX_TEXT_LENGTH;
9125
9126 GROW;
9127 in = (xmlChar *) CUR_PTR;
9128 line = ctxt->input->line;
9129 col = ctxt->input->col;
9130 if (*in != '"' && *in != '\'') {
9131 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9132 return (NULL);
9133 }
9134 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9135
9136 /*
9137 * try to handle in this routine the most common case where no
9138 * allocation of a new string is required and where content is
9139 * pure ASCII.
9140 */
9141 limit = *in++;
9142 col++;
9143 end = ctxt->input->end;
9144 start = in;
9145 if (in >= end) {
9146 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9147 }
9148 if (normalize) {
9149 /*
9150 * Skip any leading spaces
9151 */
9152 while ((in < end) && (*in != limit) &&
9153 ((*in == 0x20) || (*in == 0x9) ||
9154 (*in == 0xA) || (*in == 0xD))) {
9155 if (*in == 0xA) {
9156 line++; col = 1;
9157 } else {
9158 col++;
9159 }
9160 in++;
9161 start = in;
9162 if (in >= end) {
9163 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9164 if ((in - start) > maxLength) {
9165 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9166 "AttValue length too long\n");
9167 return(NULL);
9168 }
9169 }
9170 }
9171 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9172 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9173 col++;
9174 if ((*in++ == 0x20) && (*in == 0x20)) break;
9175 if (in >= end) {
9176 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9177 if ((in - start) > maxLength) {
9178 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9179 "AttValue length too long\n");
9180 return(NULL);
9181 }
9182 }
9183 }
9184 last = in;
9185 /*
9186 * skip the trailing blanks
9187 */
9188 while ((last[-1] == 0x20) && (last > start)) last--;
9189 while ((in < end) && (*in != limit) &&
9190 ((*in == 0x20) || (*in == 0x9) ||
9191 (*in == 0xA) || (*in == 0xD))) {
9192 if (*in == 0xA) {
9193 line++, col = 1;
9194 } else {
9195 col++;
9196 }
9197 in++;
9198 if (in >= end) {
9199 const xmlChar *oldbase = ctxt->input->base;
9200 GROW;
9201 if (ctxt->instate == XML_PARSER_EOF)
9202 return(NULL);
9203 if (oldbase != ctxt->input->base) {
9204 ptrdiff_t delta = ctxt->input->base - oldbase;
9205 start = start + delta;
9206 in = in + delta;
9207 last = last + delta;
9208 }
9209 end = ctxt->input->end;
9210 if ((in - start) > maxLength) {
9211 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9212 "AttValue length too long\n");
9213 return(NULL);
9214 }
9215 }
9216 }
9217 if ((in - start) > maxLength) {
9218 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9219 "AttValue length too long\n");
9220 return(NULL);
9221 }
9222 if (*in != limit) goto need_complex;
9223 } else {
9224 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9225 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9226 in++;
9227 col++;
9228 if (in >= end) {
9229 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9230 if ((in - start) > maxLength) {
9231 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9232 "AttValue length too long\n");
9233 return(NULL);
9234 }
9235 }
9236 }
9237 last = in;
9238 if ((in - start) > maxLength) {
9239 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9240 "AttValue length too long\n");
9241 return(NULL);
9242 }
9243 if (*in != limit) goto need_complex;
9244 }
9245 in++;
9246 col++;
9247 if (len != NULL) {
9248 if (alloc) *alloc = 0;
9249 *len = last - start;
9250 ret = (xmlChar *) start;
9251 } else {
9252 if (alloc) *alloc = 1;
9253 ret = xmlStrndup(start, last - start);
9254 }
9255 CUR_PTR = in;
9256 ctxt->input->line = line;
9257 ctxt->input->col = col;
9258 return ret;
9259 need_complex:
9260 if (alloc) *alloc = 1;
9261 return xmlParseAttValueComplex(ctxt, len, normalize);
9262 }
9263
9264 /**
9265 * xmlParseAttribute2:
9266 * @ctxt: an XML parser context
9267 * @pref: the element prefix
9268 * @elem: the element name
9269 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9270 * @value: a xmlChar ** used to store the value of the attribute
9271 * @len: an int * to save the length of the attribute
9272 * @alloc: an int * to indicate if the attribute was allocated
9273 *
9274 * parse an attribute in the new SAX2 framework.
9275 *
9276 * Returns the attribute name, and the value in *value, .
9277 */
9278
9279 static xmlHashedString
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,xmlHashedString * hprefix,xmlChar ** value,int * len,int * alloc)9280 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9281 const xmlChar * pref, const xmlChar * elem,
9282 xmlHashedString * hprefix, xmlChar ** value,
9283 int *len, int *alloc)
9284 {
9285 xmlHashedString hname;
9286 const xmlChar *prefix, *name;
9287 xmlChar *val, *internal_val = NULL;
9288 int normalize = 0;
9289
9290 *value = NULL;
9291 GROW;
9292 hname = xmlParseQNameHashed(ctxt, hprefix);
9293 if (hname.name == NULL) {
9294 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9295 "error parsing attribute name\n");
9296 return(hname);
9297 }
9298 name = hname.name;
9299 if (hprefix->name != NULL)
9300 prefix = hprefix->name;
9301 else
9302 prefix = NULL;
9303
9304 /*
9305 * get the type if needed
9306 */
9307 if (ctxt->attsSpecial != NULL) {
9308 int type;
9309
9310 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9311 pref, elem,
9312 prefix, name);
9313 if (type != 0)
9314 normalize = 1;
9315 }
9316
9317 /*
9318 * read the value
9319 */
9320 SKIP_BLANKS;
9321 if (RAW == '=') {
9322 NEXT;
9323 SKIP_BLANKS;
9324 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9325 if (val == NULL) {
9326 hname.name = NULL;
9327 return(hname);
9328 }
9329 if (normalize) {
9330 /*
9331 * Sometimes a second normalisation pass for spaces is needed
9332 * but that only happens if charrefs or entities references
9333 * have been used in the attribute value, i.e. the attribute
9334 * value have been extracted in an allocated string already.
9335 */
9336 if (*alloc) {
9337 const xmlChar *val2;
9338
9339 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9340 if ((val2 != NULL) && (val2 != val)) {
9341 xmlFree(val);
9342 val = (xmlChar *) val2;
9343 }
9344 }
9345 }
9346 ctxt->instate = XML_PARSER_CONTENT;
9347 } else {
9348 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9349 "Specification mandates value for attribute %s\n",
9350 name);
9351 return(hname);
9352 }
9353
9354 if (prefix == ctxt->str_xml) {
9355 /*
9356 * Check that xml:lang conforms to the specification
9357 * No more registered as an error, just generate a warning now
9358 * since this was deprecated in XML second edition
9359 */
9360 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9361 internal_val = xmlStrndup(val, *len);
9362 if (!xmlCheckLanguageID(internal_val)) {
9363 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9364 "Malformed value for xml:lang : %s\n",
9365 internal_val, NULL);
9366 }
9367 }
9368
9369 /*
9370 * Check that xml:space conforms to the specification
9371 */
9372 if (xmlStrEqual(name, BAD_CAST "space")) {
9373 internal_val = xmlStrndup(val, *len);
9374 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9375 *(ctxt->space) = 0;
9376 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9377 *(ctxt->space) = 1;
9378 else {
9379 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9380 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9381 internal_val, NULL);
9382 }
9383 }
9384 if (internal_val) {
9385 xmlFree(internal_val);
9386 }
9387 }
9388
9389 *value = val;
9390 return (hname);
9391 }
9392
9393 ATTRIBUTE_NO_SANITIZE_INTEGER
9394 static unsigned
xmlCombineHash(unsigned v1,unsigned v2)9395 xmlCombineHash(unsigned v1, unsigned v2) {
9396 return(HASH_ROL(v1, 15) ^ v2);
9397 }
9398
9399 /**
9400 * xmlAttrHashInsert:
9401 * @ctxt: parser context
9402 * @aindex: attribute index (this is a multiple of 5)
9403 * @sizePtr: size of the hash table (input/output value)
9404 * @name: attribute name
9405 * @uri: namespace uri
9406 * @hashValue: combined hash value of name and uri
9407 *
9408 * Inserts a new attribute into the hash table.
9409 *
9410 * Returns INT_MAX if no existing attribute was found, the attribute
9411 * index if an attribute was found, -1 if a memory allocation failed.
9412 */
9413 static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt,int aindex,unsigned * sizePtr,const xmlChar * name,const xmlChar * uri,unsigned hashValue)9414 xmlAttrHashInsert(xmlParserCtxtPtr ctxt, int aindex, unsigned *sizePtr,
9415 const xmlChar *name, const xmlChar *uri,
9416 unsigned hashValue) {
9417 xmlAttrHashBucket *table = ctxt->attrHash;
9418 xmlAttrHashBucket *bucket;
9419 unsigned hindex;
9420 unsigned size = *sizePtr;
9421
9422 if (size > 0) {
9423 hindex = hashValue & (size - 1);
9424 bucket = &table[hindex];
9425
9426 while (bucket->hashValue != 0) {
9427 const xmlChar **atts = &ctxt->atts[bucket->index];
9428
9429 if (name == atts[0]) {
9430 int nsIndex = (int) (ptrdiff_t) atts[2];
9431
9432 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
9433 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
9434 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
9435 return(bucket->index);
9436 }
9437
9438 hindex++;
9439 bucket++;
9440 if (hindex >= size) {
9441 hindex = 0;
9442 bucket = table;
9443 }
9444 }
9445 }
9446
9447 /*
9448 * Grow hash table
9449 */
9450 if ((unsigned) aindex / 5 >= size / 2) {
9451 xmlAttrHashBucket *newTable;
9452 unsigned newSize, i, nindex;
9453
9454 newSize = size ? size * 2 : 8;
9455
9456 if (newSize > ctxt->attrHashMax) {
9457 newTable = xmlRealloc(table, newSize * sizeof(newTable[0]));
9458 if (newTable == NULL) {
9459 xmlErrMemory(ctxt, NULL);
9460 return(-1);
9461 }
9462
9463 table = newTable;
9464 ctxt->attrHash = newTable;
9465 ctxt->attrHashMax = newSize;
9466 }
9467
9468 memset(&table[size], 0, (newSize - size) * sizeof(table[0]));
9469
9470 if (size > 0) {
9471 /*
9472 * We must search for the start of a probe sequence to make
9473 * in-place operation work.
9474 */
9475 hindex = 0;
9476 bucket = table;
9477 while (bucket->hashValue != 0) {
9478 hindex++;
9479 bucket++;
9480 }
9481
9482 for (i = 0; i < size; i++) {
9483 if (bucket->hashValue != 0) {
9484 nindex = bucket->hashValue & (newSize - 1);
9485
9486 while (nindex != hindex) {
9487 if (table[nindex].hashValue == 0) {
9488 table[nindex] = *bucket;
9489 bucket->hashValue = 0;
9490 break;
9491 }
9492
9493 nindex++;
9494 if (nindex >= newSize)
9495 nindex = 0;
9496 }
9497 }
9498
9499 hindex++;
9500 bucket++;
9501 if (hindex >= size) {
9502 hindex = 0;
9503 bucket = table;
9504 }
9505 }
9506 }
9507
9508 size = newSize;
9509 *sizePtr = newSize;
9510
9511 /*
9512 * Relookup
9513 */
9514 hindex = hashValue & (size - 1);
9515 bucket = &table[hindex];
9516
9517 while (bucket->hashValue != 0) {
9518 hindex++;
9519 bucket++;
9520 if (hindex >= size) {
9521 hindex = 0;
9522 bucket = table;
9523 }
9524 }
9525 }
9526
9527 bucket->hashValue = hashValue;
9528 bucket->index = aindex;
9529
9530 return(INT_MAX);
9531 }
9532
9533 /**
9534 * xmlParseStartTag2:
9535 * @ctxt: an XML parser context
9536 *
9537 * Parse a start tag. Always consumes '<'.
9538 *
9539 * This routine is called when running SAX2 parsing
9540 *
9541 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9542 *
9543 * [ WFC: Unique Att Spec ]
9544 * No attribute name may appear more than once in the same start-tag or
9545 * empty-element tag.
9546 *
9547 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9548 *
9549 * [ WFC: Unique Att Spec ]
9550 * No attribute name may appear more than once in the same start-tag or
9551 * empty-element tag.
9552 *
9553 * With namespace:
9554 *
9555 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9556 *
9557 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9558 *
9559 * Returns the element name parsed
9560 */
9561
9562 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * nbNsPtr)9563 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9564 const xmlChar **URI, int *nbNsPtr) {
9565 xmlHashedString hlocalname;
9566 xmlHashedString hprefix;
9567 xmlHashedString hattname;
9568 xmlHashedString haprefix;
9569 const xmlChar *localname;
9570 const xmlChar *prefix;
9571 const xmlChar *attname;
9572 const xmlChar *aprefix;
9573 const xmlChar *uri;
9574 xmlChar *attvalue = NULL;
9575 const xmlChar **atts = ctxt->atts;
9576 unsigned attrHashSize = 0;
9577 int maxatts = ctxt->maxatts;
9578 int nratts, nbatts, nbdef, inputid;
9579 int i, j, nbNs, attval, nsIndex;
9580 int alloc = 0;
9581
9582 if (RAW != '<') return(NULL);
9583 NEXT1;
9584
9585 inputid = ctxt->input->id;
9586 nbatts = 0;
9587 nratts = 0;
9588 nbdef = 0;
9589 nbNs = 0;
9590 attval = 0;
9591
9592 if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9593 xmlErrMemory(ctxt, NULL);
9594 return(NULL);
9595 }
9596
9597 hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9598 if (hlocalname.name == NULL) {
9599 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9600 "StartTag: invalid element name\n");
9601 return(NULL);
9602 }
9603 localname = hlocalname.name;
9604 prefix = hprefix.name;
9605
9606 /*
9607 * Now parse the attributes, it ends up with the ending
9608 *
9609 * (S Attribute)* S?
9610 */
9611 SKIP_BLANKS;
9612 GROW;
9613
9614 /*
9615 * The ctxt->atts array will be ultimately passed to the SAX callback
9616 * containing five xmlChar pointers for each attribute:
9617 *
9618 * [0] attribute name
9619 * [1] attribute prefix
9620 * [2] namespace URI
9621 * [3] attribute value
9622 * [4] end of attribute value
9623 *
9624 * To save memory, we reuse this array temporarily and store integers
9625 * in these pointer variables.
9626 *
9627 * [0] attribute name
9628 * [1] attribute prefix
9629 * [2] hash value of attribute prefix, and later namespace index
9630 * [3] for non-allocated values: ptrdiff_t offset into input buffer
9631 * [4] for non-allocated values: ptrdiff_t offset into input buffer
9632 *
9633 * The ctxt->attallocs array contains an additional unsigned int for
9634 * each attribute, containing the hash value of the attribute name
9635 * and the alloc flag in bit 31.
9636 */
9637
9638 while (((RAW != '>') &&
9639 ((RAW != '/') || (NXT(1) != '>')) &&
9640 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9641 int len = -1;
9642
9643 hattname = xmlParseAttribute2(ctxt, prefix, localname,
9644 &haprefix, &attvalue, &len,
9645 &alloc);
9646 if (hattname.name == NULL) {
9647 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9648 "xmlParseStartTag: problem parsing attributes\n");
9649 break;
9650 }
9651 if (attvalue == NULL)
9652 goto next_attr;
9653 attname = hattname.name;
9654 aprefix = haprefix.name;
9655 if (len < 0) len = xmlStrlen(attvalue);
9656
9657 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9658 xmlHashedString huri;
9659 xmlURIPtr parsedUri;
9660
9661 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9662 uri = huri.name;
9663 if (uri == NULL) {
9664 xmlErrMemory(ctxt, NULL);
9665 goto next_attr;
9666 }
9667 if (*uri != 0) {
9668 parsedUri = xmlParseURI((const char *) uri);
9669 if (parsedUri == NULL) {
9670 xmlNsErr(ctxt, XML_WAR_NS_URI,
9671 "xmlns: '%s' is not a valid URI\n",
9672 uri, NULL, NULL);
9673 } else {
9674 if (parsedUri->scheme == NULL) {
9675 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9676 "xmlns: URI %s is not absolute\n",
9677 uri, NULL, NULL);
9678 }
9679 xmlFreeURI(parsedUri);
9680 }
9681 if (uri == ctxt->str_xml_ns) {
9682 if (attname != ctxt->str_xml) {
9683 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9684 "xml namespace URI cannot be the default namespace\n",
9685 NULL, NULL, NULL);
9686 }
9687 goto next_attr;
9688 }
9689 if ((len == 29) &&
9690 (xmlStrEqual(uri,
9691 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9692 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9693 "reuse of the xmlns namespace name is forbidden\n",
9694 NULL, NULL, NULL);
9695 goto next_attr;
9696 }
9697 }
9698
9699 if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9700 nbNs++;
9701 } else if (aprefix == ctxt->str_xmlns) {
9702 xmlHashedString huri;
9703 xmlURIPtr parsedUri;
9704
9705 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9706 uri = huri.name;
9707 if (uri == NULL) {
9708 xmlErrMemory(ctxt, NULL);
9709 goto next_attr;
9710 }
9711
9712 if (attname == ctxt->str_xml) {
9713 if (uri != ctxt->str_xml_ns) {
9714 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9715 "xml namespace prefix mapped to wrong URI\n",
9716 NULL, NULL, NULL);
9717 }
9718 /*
9719 * Do not keep a namespace definition node
9720 */
9721 goto next_attr;
9722 }
9723 if (uri == ctxt->str_xml_ns) {
9724 if (attname != ctxt->str_xml) {
9725 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9726 "xml namespace URI mapped to wrong prefix\n",
9727 NULL, NULL, NULL);
9728 }
9729 goto next_attr;
9730 }
9731 if (attname == ctxt->str_xmlns) {
9732 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9733 "redefinition of the xmlns prefix is forbidden\n",
9734 NULL, NULL, NULL);
9735 goto next_attr;
9736 }
9737 if ((len == 29) &&
9738 (xmlStrEqual(uri,
9739 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9740 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9741 "reuse of the xmlns namespace name is forbidden\n",
9742 NULL, NULL, NULL);
9743 goto next_attr;
9744 }
9745 if ((uri == NULL) || (uri[0] == 0)) {
9746 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9747 "xmlns:%s: Empty XML namespace is not allowed\n",
9748 attname, NULL, NULL);
9749 goto next_attr;
9750 } else {
9751 parsedUri = xmlParseURI((const char *) uri);
9752 if (parsedUri == NULL) {
9753 xmlNsErr(ctxt, XML_WAR_NS_URI,
9754 "xmlns:%s: '%s' is not a valid URI\n",
9755 attname, uri, NULL);
9756 } else {
9757 if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9758 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9759 "xmlns:%s: URI %s is not absolute\n",
9760 attname, uri, NULL);
9761 }
9762 xmlFreeURI(parsedUri);
9763 }
9764 }
9765
9766 if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9767 nbNs++;
9768 } else {
9769 /*
9770 * Populate attributes array, see above for repurposing
9771 * of xmlChar pointers.
9772 */
9773 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9774 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9775 goto next_attr;
9776 }
9777 maxatts = ctxt->maxatts;
9778 atts = ctxt->atts;
9779 }
9780 ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9781 ((unsigned) alloc << 31);
9782 atts[nbatts++] = attname;
9783 atts[nbatts++] = aprefix;
9784 atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9785 if (alloc) {
9786 atts[nbatts++] = attvalue;
9787 attvalue += len;
9788 atts[nbatts++] = attvalue;
9789 } else {
9790 /*
9791 * attvalue points into the input buffer which can be
9792 * reallocated. Store differences to input->base instead.
9793 * The pointers will be reconstructed later.
9794 */
9795 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9796 attvalue += len;
9797 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9798 }
9799 /*
9800 * tag if some deallocation is needed
9801 */
9802 if (alloc != 0) attval = 1;
9803 attvalue = NULL; /* moved into atts */
9804 }
9805
9806 next_attr:
9807 if ((attvalue != NULL) && (alloc != 0)) {
9808 xmlFree(attvalue);
9809 attvalue = NULL;
9810 }
9811
9812 GROW
9813 if (ctxt->instate == XML_PARSER_EOF)
9814 break;
9815 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9816 break;
9817 if (SKIP_BLANKS == 0) {
9818 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9819 "attributes construct error\n");
9820 break;
9821 }
9822 GROW;
9823 }
9824
9825 if (ctxt->input->id != inputid) {
9826 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9827 "Unexpected change of input\n");
9828 localname = NULL;
9829 goto done;
9830 }
9831
9832 /*
9833 * Namespaces from default attributes
9834 */
9835 if (ctxt->attsDefault != NULL) {
9836 xmlDefAttrsPtr defaults;
9837
9838 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9839 if (defaults != NULL) {
9840 for (i = 0; i < defaults->nbAttrs; i++) {
9841 xmlDefAttr *attr = &defaults->attrs[i];
9842
9843 attname = attr->name.name;
9844 aprefix = attr->prefix.name;
9845
9846 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9847 xmlParserEntityCheck(ctxt, attr->expandedSize);
9848
9849 if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9850 nbNs++;
9851 } else if (aprefix == ctxt->str_xmlns) {
9852 xmlParserEntityCheck(ctxt, attr->expandedSize);
9853
9854 if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9855 NULL, 1) > 0)
9856 nbNs++;
9857 }
9858 }
9859 }
9860 }
9861
9862 /*
9863 * Resolve attribute namespaces
9864 */
9865 for (i = 0; i < nbatts; i += 5) {
9866 attname = atts[i];
9867 aprefix = atts[i+1];
9868
9869 /*
9870 * The default namespace does not apply to attribute names.
9871 */
9872 if (aprefix == NULL) {
9873 nsIndex = NS_INDEX_EMPTY;
9874 } else if (aprefix == ctxt->str_xml) {
9875 nsIndex = NS_INDEX_XML;
9876 } else {
9877 haprefix.name = aprefix;
9878 haprefix.hashValue = (size_t) atts[i+2];
9879 nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9880 if (nsIndex == INT_MAX) {
9881 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9882 "Namespace prefix %s for %s on %s is not defined\n",
9883 aprefix, attname, localname);
9884 nsIndex = NS_INDEX_EMPTY;
9885 }
9886 }
9887
9888 atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9889 }
9890
9891 /*
9892 * Verify that attribute names are unique.
9893 */
9894 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9895 const xmlChar *nsuri;
9896 unsigned hashValue, nameHashValue, uriHashValue;
9897 int res;
9898
9899 attname = atts[i];
9900 aprefix = atts[i+1];
9901 nsIndex = (ptrdiff_t) atts[i+2];
9902 /* Hash values always have bit 31 set, see dict.c */
9903 nameHashValue = ctxt->attallocs[j] | 0x80000000;
9904
9905 if (nsIndex == NS_INDEX_EMPTY) {
9906 nsuri = NULL;
9907 uriHashValue = URI_HASH_EMPTY;
9908 } else if (nsIndex == NS_INDEX_XML) {
9909 nsuri = ctxt->str_xml_ns;
9910 uriHashValue = URI_HASH_XML;
9911 } else {
9912 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9913 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9914 }
9915
9916 hashValue = xmlCombineHash(nameHashValue, uriHashValue);
9917 res = xmlAttrHashInsert(ctxt, i, &attrHashSize, attname, nsuri,
9918 hashValue);
9919 if (res < 0)
9920 continue;
9921
9922 /*
9923 * [ WFC: Unique Att Spec ]
9924 * No attribute name may appear more than once in the same
9925 * start-tag or empty-element tag.
9926 * As extended by the Namespace in XML REC.
9927 */
9928 if (res < INT_MAX) {
9929 if (aprefix == atts[res+1]) {
9930 xmlErrAttributeDup(ctxt, aprefix, attname);
9931 } else {
9932 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9933 "Namespaced Attribute %s in '%s' redefined\n",
9934 attname, nsuri, NULL);
9935 }
9936 }
9937 }
9938
9939 /*
9940 * Default attributes
9941 */
9942 if (ctxt->attsDefault != NULL) {
9943 xmlDefAttrsPtr defaults;
9944
9945 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9946 if (defaults != NULL) {
9947 for (i = 0; i < defaults->nbAttrs; i++) {
9948 xmlDefAttr *attr = &defaults->attrs[i];
9949 const xmlChar *nsuri;
9950 unsigned hashValue, uriHashValue;
9951 int res;
9952
9953 attname = attr->name.name;
9954 aprefix = attr->prefix.name;
9955
9956 if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9957 continue;
9958 if (aprefix == ctxt->str_xmlns)
9959 continue;
9960
9961 if (aprefix == NULL) {
9962 nsIndex = NS_INDEX_EMPTY;
9963 nsuri = NULL;
9964 uriHashValue = URI_HASH_EMPTY;
9965 } if (aprefix == ctxt->str_xml) {
9966 nsIndex = NS_INDEX_XML;
9967 nsuri = ctxt->str_xml_ns;
9968 uriHashValue = URI_HASH_XML;
9969 } else if (aprefix != NULL) {
9970 nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9971 if (nsIndex == INT_MAX) {
9972 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9973 "Namespace prefix %s for %s on %s is not "
9974 "defined\n",
9975 aprefix, attname, localname);
9976 nsIndex = NS_INDEX_EMPTY;
9977 nsuri = NULL;
9978 uriHashValue = URI_HASH_EMPTY;
9979 } else {
9980 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9981 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9982 }
9983 }
9984
9985 /*
9986 * Check whether the attribute exists
9987 */
9988 hashValue = xmlCombineHash(attr->name.hashValue, uriHashValue);
9989 res = xmlAttrHashInsert(ctxt, nbatts, &attrHashSize, attname,
9990 nsuri, hashValue);
9991 if (res < 0)
9992 continue;
9993 if (res < INT_MAX) {
9994 if (aprefix == atts[res+1])
9995 continue;
9996 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9997 "Namespaced Attribute %s in '%s' redefined\n",
9998 attname, nsuri, NULL);
9999 }
10000
10001 xmlParserEntityCheck(ctxt, attr->expandedSize);
10002
10003 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
10004 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
10005 localname = NULL;
10006 goto done;
10007 }
10008 maxatts = ctxt->maxatts;
10009 atts = ctxt->atts;
10010 }
10011
10012 atts[nbatts++] = attname;
10013 atts[nbatts++] = aprefix;
10014 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
10015 atts[nbatts++] = attr->value.name;
10016 atts[nbatts++] = attr->valueEnd;
10017 if ((ctxt->standalone == 1) && (attr->external != 0)) {
10018 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
10019 "standalone: attribute %s on %s defaulted "
10020 "from external subset\n",
10021 attname, localname);
10022 }
10023 nbdef++;
10024 }
10025 }
10026 }
10027
10028 /*
10029 * Reconstruct attribute pointers
10030 */
10031 for (i = 0, j = 0; i < nbatts; i += 5, j++) {
10032 /* namespace URI */
10033 nsIndex = (ptrdiff_t) atts[i+2];
10034 if (nsIndex == INT_MAX)
10035 atts[i+2] = NULL;
10036 else if (nsIndex == INT_MAX - 1)
10037 atts[i+2] = ctxt->str_xml_ns;
10038 else
10039 atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
10040
10041 if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
10042 atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */
10043 atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */
10044 }
10045 }
10046
10047 uri = xmlParserNsLookupUri(ctxt, &hprefix);
10048 if ((prefix != NULL) && (uri == NULL)) {
10049 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
10050 "Namespace prefix %s on %s is not defined\n",
10051 prefix, localname, NULL);
10052 }
10053 *pref = prefix;
10054 *URI = uri;
10055
10056 /*
10057 * SAX callback
10058 */
10059 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
10060 (!ctxt->disableSAX)) {
10061 if (nbNs > 0)
10062 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
10063 nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
10064 nbatts / 5, nbdef, atts);
10065 else
10066 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
10067 0, NULL, nbatts / 5, nbdef, atts);
10068 }
10069
10070 done:
10071 /*
10072 * Free allocated attribute values
10073 */
10074 if (attval != 0) {
10075 for (i = 0, j = 0; j < nratts; i += 5, j++)
10076 if (ctxt->attallocs[j] & 0x80000000)
10077 xmlFree((xmlChar *) atts[i+3]);
10078 }
10079
10080 *nbNsPtr = nbNs;
10081 return(localname);
10082 }
10083
10084 /**
10085 * xmlParseEndTag2:
10086 * @ctxt: an XML parser context
10087 * @line: line of the start tag
10088 * @nsNr: number of namespaces on the start tag
10089 *
10090 * Parse an end tag. Always consumes '</'.
10091 *
10092 * [42] ETag ::= '</' Name S? '>'
10093 *
10094 * With namespace
10095 *
10096 * [NS 9] ETag ::= '</' QName S? '>'
10097 */
10098
10099 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)10100 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
10101 const xmlChar *name;
10102
10103 GROW;
10104 if ((RAW != '<') || (NXT(1) != '/')) {
10105 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
10106 return;
10107 }
10108 SKIP(2);
10109
10110 if (tag->prefix == NULL)
10111 name = xmlParseNameAndCompare(ctxt, ctxt->name);
10112 else
10113 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
10114
10115 /*
10116 * We should definitely be at the ending "S? '>'" part
10117 */
10118 GROW;
10119 if (ctxt->instate == XML_PARSER_EOF)
10120 return;
10121 SKIP_BLANKS;
10122 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
10123 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
10124 } else
10125 NEXT1;
10126
10127 /*
10128 * [ WFC: Element Type Match ]
10129 * The Name in an element's end-tag must match the element type in the
10130 * start-tag.
10131 *
10132 */
10133 if (name != (xmlChar*)1) {
10134 if (name == NULL) name = BAD_CAST "unparsable";
10135 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
10136 "Opening and ending tag mismatch: %s line %d and %s\n",
10137 ctxt->name, tag->line, name);
10138 }
10139
10140 /*
10141 * SAX: End of Tag
10142 */
10143 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10144 (!ctxt->disableSAX))
10145 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
10146 tag->URI);
10147
10148 spacePop(ctxt);
10149 if (tag->nsNr != 0)
10150 xmlParserNsPop(ctxt, tag->nsNr);
10151 }
10152
10153 /**
10154 * xmlParseCDSect:
10155 * @ctxt: an XML parser context
10156 *
10157 * DEPRECATED: Internal function, don't use.
10158 *
10159 * Parse escaped pure raw content. Always consumes '<!['.
10160 *
10161 * [18] CDSect ::= CDStart CData CDEnd
10162 *
10163 * [19] CDStart ::= '<![CDATA['
10164 *
10165 * [20] Data ::= (Char* - (Char* ']]>' Char*))
10166 *
10167 * [21] CDEnd ::= ']]>'
10168 */
10169 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)10170 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
10171 xmlChar *buf = NULL;
10172 int len = 0;
10173 int size = XML_PARSER_BUFFER_SIZE;
10174 int r, rl;
10175 int s, sl;
10176 int cur, l;
10177 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10178 XML_MAX_HUGE_LENGTH :
10179 XML_MAX_TEXT_LENGTH;
10180
10181 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
10182 return;
10183 SKIP(3);
10184
10185 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
10186 return;
10187 SKIP(6);
10188
10189 ctxt->instate = XML_PARSER_CDATA_SECTION;
10190 r = CUR_CHAR(rl);
10191 if (!IS_CHAR(r)) {
10192 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
10193 goto out;
10194 }
10195 NEXTL(rl);
10196 s = CUR_CHAR(sl);
10197 if (!IS_CHAR(s)) {
10198 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
10199 goto out;
10200 }
10201 NEXTL(sl);
10202 cur = CUR_CHAR(l);
10203 buf = (xmlChar *) xmlMallocAtomic(size);
10204 if (buf == NULL) {
10205 xmlErrMemory(ctxt, NULL);
10206 goto out;
10207 }
10208 while (IS_CHAR(cur) &&
10209 ((r != ']') || (s != ']') || (cur != '>'))) {
10210 if (len + 5 >= size) {
10211 xmlChar *tmp;
10212
10213 tmp = (xmlChar *) xmlRealloc(buf, size * 2);
10214 if (tmp == NULL) {
10215 xmlErrMemory(ctxt, NULL);
10216 goto out;
10217 }
10218 buf = tmp;
10219 size *= 2;
10220 }
10221 COPY_BUF(buf, len, r);
10222 if (len > maxLength) {
10223 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10224 "CData section too big found\n");
10225 goto out;
10226 }
10227 r = s;
10228 rl = sl;
10229 s = cur;
10230 sl = l;
10231 NEXTL(l);
10232 cur = CUR_CHAR(l);
10233 }
10234 buf[len] = 0;
10235 if (ctxt->instate == XML_PARSER_EOF) {
10236 xmlFree(buf);
10237 return;
10238 }
10239 if (cur != '>') {
10240 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10241 "CData section not finished\n%.50s\n", buf);
10242 goto out;
10243 }
10244 NEXTL(l);
10245
10246 /*
10247 * OK the buffer is to be consumed as cdata.
10248 */
10249 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10250 if (ctxt->sax->cdataBlock != NULL)
10251 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
10252 else if (ctxt->sax->characters != NULL)
10253 ctxt->sax->characters(ctxt->userData, buf, len);
10254 }
10255
10256 out:
10257 if (ctxt->instate != XML_PARSER_EOF)
10258 ctxt->instate = XML_PARSER_CONTENT;
10259 xmlFree(buf);
10260 }
10261
10262 /**
10263 * xmlParseContentInternal:
10264 * @ctxt: an XML parser context
10265 *
10266 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
10267 * unexpected EOF to the caller.
10268 */
10269
10270 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)10271 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
10272 int nameNr = ctxt->nameNr;
10273
10274 GROW;
10275 while ((ctxt->input->cur < ctxt->input->end) &&
10276 (ctxt->instate != XML_PARSER_EOF)) {
10277 const xmlChar *cur = ctxt->input->cur;
10278
10279 /*
10280 * First case : a Processing Instruction.
10281 */
10282 if ((*cur == '<') && (cur[1] == '?')) {
10283 xmlParsePI(ctxt);
10284 }
10285
10286 /*
10287 * Second case : a CDSection
10288 */
10289 /* 2.6.0 test was *cur not RAW */
10290 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10291 xmlParseCDSect(ctxt);
10292 }
10293
10294 /*
10295 * Third case : a comment
10296 */
10297 else if ((*cur == '<') && (NXT(1) == '!') &&
10298 (NXT(2) == '-') && (NXT(3) == '-')) {
10299 xmlParseComment(ctxt);
10300 ctxt->instate = XML_PARSER_CONTENT;
10301 }
10302
10303 /*
10304 * Fourth case : a sub-element.
10305 */
10306 else if (*cur == '<') {
10307 if (NXT(1) == '/') {
10308 if (ctxt->nameNr <= nameNr)
10309 break;
10310 xmlParseElementEnd(ctxt);
10311 } else {
10312 xmlParseElementStart(ctxt);
10313 }
10314 }
10315
10316 /*
10317 * Fifth case : a reference. If if has not been resolved,
10318 * parsing returns it's Name, create the node
10319 */
10320
10321 else if (*cur == '&') {
10322 xmlParseReference(ctxt);
10323 }
10324
10325 /*
10326 * Last case, text. Note that References are handled directly.
10327 */
10328 else {
10329 xmlParseCharDataInternal(ctxt, 0);
10330 }
10331
10332 SHRINK;
10333 GROW;
10334 }
10335 }
10336
10337 /**
10338 * xmlParseContent:
10339 * @ctxt: an XML parser context
10340 *
10341 * Parse a content sequence. Stops at EOF or '</'.
10342 *
10343 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10344 */
10345
10346 void
xmlParseContent(xmlParserCtxtPtr ctxt)10347 xmlParseContent(xmlParserCtxtPtr ctxt) {
10348 int nameNr = ctxt->nameNr;
10349
10350 xmlParseContentInternal(ctxt);
10351
10352 if ((ctxt->instate != XML_PARSER_EOF) &&
10353 (ctxt->errNo == XML_ERR_OK) &&
10354 (ctxt->nameNr > nameNr)) {
10355 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10356 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10357 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10358 "Premature end of data in tag %s line %d\n",
10359 name, line, NULL);
10360 }
10361 }
10362
10363 /**
10364 * xmlParseElement:
10365 * @ctxt: an XML parser context
10366 *
10367 * DEPRECATED: Internal function, don't use.
10368 *
10369 * parse an XML element
10370 *
10371 * [39] element ::= EmptyElemTag | STag content ETag
10372 *
10373 * [ WFC: Element Type Match ]
10374 * The Name in an element's end-tag must match the element type in the
10375 * start-tag.
10376 *
10377 */
10378
10379 void
xmlParseElement(xmlParserCtxtPtr ctxt)10380 xmlParseElement(xmlParserCtxtPtr ctxt) {
10381 if (xmlParseElementStart(ctxt) != 0)
10382 return;
10383
10384 xmlParseContentInternal(ctxt);
10385 if (ctxt->instate == XML_PARSER_EOF)
10386 return;
10387
10388 if (ctxt->input->cur >= ctxt->input->end) {
10389 if (ctxt->errNo == XML_ERR_OK) {
10390 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10391 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10392 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10393 "Premature end of data in tag %s line %d\n",
10394 name, line, NULL);
10395 }
10396 return;
10397 }
10398
10399 xmlParseElementEnd(ctxt);
10400 }
10401
10402 /**
10403 * xmlParseElementStart:
10404 * @ctxt: an XML parser context
10405 *
10406 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10407 * opening tag was parsed, 1 if an empty element was parsed.
10408 *
10409 * Always consumes '<'.
10410 */
10411 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)10412 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10413 const xmlChar *name;
10414 const xmlChar *prefix = NULL;
10415 const xmlChar *URI = NULL;
10416 xmlParserNodeInfo node_info;
10417 int line;
10418 xmlNodePtr cur;
10419 int nbNs = 0;
10420
10421 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10422 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10423 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10424 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10425 xmlParserMaxDepth);
10426 xmlHaltParser(ctxt);
10427 return(-1);
10428 }
10429
10430 /* Capture start position */
10431 if (ctxt->record_info) {
10432 node_info.begin_pos = ctxt->input->consumed +
10433 (CUR_PTR - ctxt->input->base);
10434 node_info.begin_line = ctxt->input->line;
10435 }
10436
10437 if (ctxt->spaceNr == 0)
10438 spacePush(ctxt, -1);
10439 else if (*ctxt->space == -2)
10440 spacePush(ctxt, -1);
10441 else
10442 spacePush(ctxt, *ctxt->space);
10443
10444 line = ctxt->input->line;
10445 #ifdef LIBXML_SAX1_ENABLED
10446 if (ctxt->sax2)
10447 #endif /* LIBXML_SAX1_ENABLED */
10448 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10449 #ifdef LIBXML_SAX1_ENABLED
10450 else
10451 name = xmlParseStartTag(ctxt);
10452 #endif /* LIBXML_SAX1_ENABLED */
10453 if (ctxt->instate == XML_PARSER_EOF)
10454 return(-1);
10455 if (name == NULL) {
10456 spacePop(ctxt);
10457 return(-1);
10458 }
10459 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10460 cur = ctxt->node;
10461
10462 #ifdef LIBXML_VALID_ENABLED
10463 /*
10464 * [ VC: Root Element Type ]
10465 * The Name in the document type declaration must match the element
10466 * type of the root element.
10467 */
10468 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10469 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10470 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10471 #endif /* LIBXML_VALID_ENABLED */
10472
10473 /*
10474 * Check for an Empty Element.
10475 */
10476 if ((RAW == '/') && (NXT(1) == '>')) {
10477 SKIP(2);
10478 if (ctxt->sax2) {
10479 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10480 (!ctxt->disableSAX))
10481 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10482 #ifdef LIBXML_SAX1_ENABLED
10483 } else {
10484 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10485 (!ctxt->disableSAX))
10486 ctxt->sax->endElement(ctxt->userData, name);
10487 #endif /* LIBXML_SAX1_ENABLED */
10488 }
10489 namePop(ctxt);
10490 spacePop(ctxt);
10491 if (nbNs > 0)
10492 xmlParserNsPop(ctxt, nbNs);
10493 if (cur != NULL && ctxt->record_info) {
10494 node_info.node = cur;
10495 node_info.end_pos = ctxt->input->consumed +
10496 (CUR_PTR - ctxt->input->base);
10497 node_info.end_line = ctxt->input->line;
10498 xmlParserAddNodeInfo(ctxt, &node_info);
10499 }
10500 return(1);
10501 }
10502 if (RAW == '>') {
10503 NEXT1;
10504 if (cur != NULL && ctxt->record_info) {
10505 node_info.node = cur;
10506 node_info.end_pos = 0;
10507 node_info.end_line = 0;
10508 xmlParserAddNodeInfo(ctxt, &node_info);
10509 }
10510 } else {
10511 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10512 "Couldn't find end of Start Tag %s line %d\n",
10513 name, line, NULL);
10514
10515 /*
10516 * end of parsing of this node.
10517 */
10518 nodePop(ctxt);
10519 namePop(ctxt);
10520 spacePop(ctxt);
10521 if (nbNs > 0)
10522 xmlParserNsPop(ctxt, nbNs);
10523 return(-1);
10524 }
10525
10526 return(0);
10527 }
10528
10529 /**
10530 * xmlParseElementEnd:
10531 * @ctxt: an XML parser context
10532 *
10533 * Parse the end of an XML element. Always consumes '</'.
10534 */
10535 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)10536 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10537 xmlNodePtr cur = ctxt->node;
10538
10539 if (ctxt->nameNr <= 0) {
10540 if ((RAW == '<') && (NXT(1) == '/'))
10541 SKIP(2);
10542 return;
10543 }
10544
10545 /*
10546 * parse the end of tag: '</' should be here.
10547 */
10548 if (ctxt->sax2) {
10549 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10550 namePop(ctxt);
10551 }
10552 #ifdef LIBXML_SAX1_ENABLED
10553 else
10554 xmlParseEndTag1(ctxt, 0);
10555 #endif /* LIBXML_SAX1_ENABLED */
10556
10557 /*
10558 * Capture end position
10559 */
10560 if (cur != NULL && ctxt->record_info) {
10561 xmlParserNodeInfoPtr node_info;
10562
10563 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10564 if (node_info != NULL) {
10565 node_info->end_pos = ctxt->input->consumed +
10566 (CUR_PTR - ctxt->input->base);
10567 node_info->end_line = ctxt->input->line;
10568 }
10569 }
10570 }
10571
10572 /**
10573 * xmlParseVersionNum:
10574 * @ctxt: an XML parser context
10575 *
10576 * DEPRECATED: Internal function, don't use.
10577 *
10578 * parse the XML version value.
10579 *
10580 * [26] VersionNum ::= '1.' [0-9]+
10581 *
10582 * In practice allow [0-9].[0-9]+ at that level
10583 *
10584 * Returns the string giving the XML version number, or NULL
10585 */
10586 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10587 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10588 xmlChar *buf = NULL;
10589 int len = 0;
10590 int size = 10;
10591 xmlChar cur;
10592
10593 buf = (xmlChar *) xmlMallocAtomic(size);
10594 if (buf == NULL) {
10595 xmlErrMemory(ctxt, NULL);
10596 return(NULL);
10597 }
10598 cur = CUR;
10599 if (!((cur >= '0') && (cur <= '9'))) {
10600 xmlFree(buf);
10601 return(NULL);
10602 }
10603 buf[len++] = cur;
10604 NEXT;
10605 cur=CUR;
10606 if (cur != '.') {
10607 xmlFree(buf);
10608 return(NULL);
10609 }
10610 buf[len++] = cur;
10611 NEXT;
10612 cur=CUR;
10613 while ((cur >= '0') && (cur <= '9')) {
10614 if (len + 1 >= size) {
10615 xmlChar *tmp;
10616
10617 size *= 2;
10618 tmp = (xmlChar *) xmlRealloc(buf, size);
10619 if (tmp == NULL) {
10620 xmlFree(buf);
10621 xmlErrMemory(ctxt, NULL);
10622 return(NULL);
10623 }
10624 buf = tmp;
10625 }
10626 buf[len++] = cur;
10627 NEXT;
10628 cur=CUR;
10629 }
10630 buf[len] = 0;
10631 return(buf);
10632 }
10633
10634 /**
10635 * xmlParseVersionInfo:
10636 * @ctxt: an XML parser context
10637 *
10638 * DEPRECATED: Internal function, don't use.
10639 *
10640 * parse the XML version.
10641 *
10642 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10643 *
10644 * [25] Eq ::= S? '=' S?
10645 *
10646 * Returns the version string, e.g. "1.0"
10647 */
10648
10649 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10650 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10651 xmlChar *version = NULL;
10652
10653 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10654 SKIP(7);
10655 SKIP_BLANKS;
10656 if (RAW != '=') {
10657 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10658 return(NULL);
10659 }
10660 NEXT;
10661 SKIP_BLANKS;
10662 if (RAW == '"') {
10663 NEXT;
10664 version = xmlParseVersionNum(ctxt);
10665 if (RAW != '"') {
10666 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10667 } else
10668 NEXT;
10669 } else if (RAW == '\''){
10670 NEXT;
10671 version = xmlParseVersionNum(ctxt);
10672 if (RAW != '\'') {
10673 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10674 } else
10675 NEXT;
10676 } else {
10677 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10678 }
10679 }
10680 return(version);
10681 }
10682
10683 /**
10684 * xmlParseEncName:
10685 * @ctxt: an XML parser context
10686 *
10687 * DEPRECATED: Internal function, don't use.
10688 *
10689 * parse the XML encoding name
10690 *
10691 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10692 *
10693 * Returns the encoding name value or NULL
10694 */
10695 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10696 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10697 xmlChar *buf = NULL;
10698 int len = 0;
10699 int size = 10;
10700 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10701 XML_MAX_TEXT_LENGTH :
10702 XML_MAX_NAME_LENGTH;
10703 xmlChar cur;
10704
10705 cur = CUR;
10706 if (((cur >= 'a') && (cur <= 'z')) ||
10707 ((cur >= 'A') && (cur <= 'Z'))) {
10708 buf = (xmlChar *) xmlMallocAtomic(size);
10709 if (buf == NULL) {
10710 xmlErrMemory(ctxt, NULL);
10711 return(NULL);
10712 }
10713
10714 buf[len++] = cur;
10715 NEXT;
10716 cur = CUR;
10717 while (((cur >= 'a') && (cur <= 'z')) ||
10718 ((cur >= 'A') && (cur <= 'Z')) ||
10719 ((cur >= '0') && (cur <= '9')) ||
10720 (cur == '.') || (cur == '_') ||
10721 (cur == '-')) {
10722 if (len + 1 >= size) {
10723 xmlChar *tmp;
10724
10725 size *= 2;
10726 tmp = (xmlChar *) xmlRealloc(buf, size);
10727 if (tmp == NULL) {
10728 xmlErrMemory(ctxt, NULL);
10729 xmlFree(buf);
10730 return(NULL);
10731 }
10732 buf = tmp;
10733 }
10734 buf[len++] = cur;
10735 if (len > maxLength) {
10736 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10737 xmlFree(buf);
10738 return(NULL);
10739 }
10740 NEXT;
10741 cur = CUR;
10742 }
10743 buf[len] = 0;
10744 } else {
10745 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10746 }
10747 return(buf);
10748 }
10749
10750 /**
10751 * xmlParseEncodingDecl:
10752 * @ctxt: an XML parser context
10753 *
10754 * DEPRECATED: Internal function, don't use.
10755 *
10756 * parse the XML encoding declaration
10757 *
10758 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10759 *
10760 * this setups the conversion filters.
10761 *
10762 * Returns the encoding value or NULL
10763 */
10764
10765 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10766 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10767 xmlChar *encoding = NULL;
10768
10769 SKIP_BLANKS;
10770 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10771 return(NULL);
10772
10773 SKIP(8);
10774 SKIP_BLANKS;
10775 if (RAW != '=') {
10776 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10777 return(NULL);
10778 }
10779 NEXT;
10780 SKIP_BLANKS;
10781 if (RAW == '"') {
10782 NEXT;
10783 encoding = xmlParseEncName(ctxt);
10784 if (RAW != '"') {
10785 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10786 xmlFree((xmlChar *) encoding);
10787 return(NULL);
10788 } else
10789 NEXT;
10790 } else if (RAW == '\''){
10791 NEXT;
10792 encoding = xmlParseEncName(ctxt);
10793 if (RAW != '\'') {
10794 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10795 xmlFree((xmlChar *) encoding);
10796 return(NULL);
10797 } else
10798 NEXT;
10799 } else {
10800 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10801 }
10802
10803 if (encoding == NULL)
10804 return(NULL);
10805
10806 xmlSetDeclaredEncoding(ctxt, encoding);
10807
10808 return(ctxt->encoding);
10809 }
10810
10811 /**
10812 * xmlParseSDDecl:
10813 * @ctxt: an XML parser context
10814 *
10815 * DEPRECATED: Internal function, don't use.
10816 *
10817 * parse the XML standalone declaration
10818 *
10819 * [32] SDDecl ::= S 'standalone' Eq
10820 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10821 *
10822 * [ VC: Standalone Document Declaration ]
10823 * TODO The standalone document declaration must have the value "no"
10824 * if any external markup declarations contain declarations of:
10825 * - attributes with default values, if elements to which these
10826 * attributes apply appear in the document without specifications
10827 * of values for these attributes, or
10828 * - entities (other than amp, lt, gt, apos, quot), if references
10829 * to those entities appear in the document, or
10830 * - attributes with values subject to normalization, where the
10831 * attribute appears in the document with a value which will change
10832 * as a result of normalization, or
10833 * - element types with element content, if white space occurs directly
10834 * within any instance of those types.
10835 *
10836 * Returns:
10837 * 1 if standalone="yes"
10838 * 0 if standalone="no"
10839 * -2 if standalone attribute is missing or invalid
10840 * (A standalone value of -2 means that the XML declaration was found,
10841 * but no value was specified for the standalone attribute).
10842 */
10843
10844 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10845 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10846 int standalone = -2;
10847
10848 SKIP_BLANKS;
10849 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10850 SKIP(10);
10851 SKIP_BLANKS;
10852 if (RAW != '=') {
10853 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10854 return(standalone);
10855 }
10856 NEXT;
10857 SKIP_BLANKS;
10858 if (RAW == '\''){
10859 NEXT;
10860 if ((RAW == 'n') && (NXT(1) == 'o')) {
10861 standalone = 0;
10862 SKIP(2);
10863 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10864 (NXT(2) == 's')) {
10865 standalone = 1;
10866 SKIP(3);
10867 } else {
10868 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10869 }
10870 if (RAW != '\'') {
10871 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10872 } else
10873 NEXT;
10874 } else if (RAW == '"'){
10875 NEXT;
10876 if ((RAW == 'n') && (NXT(1) == 'o')) {
10877 standalone = 0;
10878 SKIP(2);
10879 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10880 (NXT(2) == 's')) {
10881 standalone = 1;
10882 SKIP(3);
10883 } else {
10884 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10885 }
10886 if (RAW != '"') {
10887 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10888 } else
10889 NEXT;
10890 } else {
10891 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10892 }
10893 }
10894 return(standalone);
10895 }
10896
10897 /**
10898 * xmlParseXMLDecl:
10899 * @ctxt: an XML parser context
10900 *
10901 * DEPRECATED: Internal function, don't use.
10902 *
10903 * parse an XML declaration header
10904 *
10905 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10906 */
10907
10908 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10909 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10910 xmlChar *version;
10911
10912 /*
10913 * This value for standalone indicates that the document has an
10914 * XML declaration but it does not have a standalone attribute.
10915 * It will be overwritten later if a standalone attribute is found.
10916 */
10917
10918 ctxt->standalone = -2;
10919
10920 /*
10921 * We know that '<?xml' is here.
10922 */
10923 SKIP(5);
10924
10925 if (!IS_BLANK_CH(RAW)) {
10926 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10927 "Blank needed after '<?xml'\n");
10928 }
10929 SKIP_BLANKS;
10930
10931 /*
10932 * We must have the VersionInfo here.
10933 */
10934 version = xmlParseVersionInfo(ctxt);
10935 if (version == NULL) {
10936 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10937 } else {
10938 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10939 /*
10940 * Changed here for XML-1.0 5th edition
10941 */
10942 if (ctxt->options & XML_PARSE_OLD10) {
10943 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10944 "Unsupported version '%s'\n",
10945 version);
10946 } else {
10947 if ((version[0] == '1') && ((version[1] == '.'))) {
10948 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10949 "Unsupported version '%s'\n",
10950 version, NULL);
10951 } else {
10952 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10953 "Unsupported version '%s'\n",
10954 version);
10955 }
10956 }
10957 }
10958 if (ctxt->version != NULL)
10959 xmlFree((void *) ctxt->version);
10960 ctxt->version = version;
10961 }
10962
10963 /*
10964 * We may have the encoding declaration
10965 */
10966 if (!IS_BLANK_CH(RAW)) {
10967 if ((RAW == '?') && (NXT(1) == '>')) {
10968 SKIP(2);
10969 return;
10970 }
10971 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10972 }
10973 xmlParseEncodingDecl(ctxt);
10974 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10975 (ctxt->instate == XML_PARSER_EOF)) {
10976 /*
10977 * The XML REC instructs us to stop parsing right here
10978 */
10979 return;
10980 }
10981
10982 /*
10983 * We may have the standalone status.
10984 */
10985 if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10986 if ((RAW == '?') && (NXT(1) == '>')) {
10987 SKIP(2);
10988 return;
10989 }
10990 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10991 }
10992
10993 /*
10994 * We can grow the input buffer freely at that point
10995 */
10996 GROW;
10997
10998 SKIP_BLANKS;
10999 ctxt->standalone = xmlParseSDDecl(ctxt);
11000
11001 SKIP_BLANKS;
11002 if ((RAW == '?') && (NXT(1) == '>')) {
11003 SKIP(2);
11004 } else if (RAW == '>') {
11005 /* Deprecated old WD ... */
11006 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
11007 NEXT;
11008 } else {
11009 int c;
11010
11011 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
11012 while ((c = CUR) != 0) {
11013 NEXT;
11014 if (c == '>')
11015 break;
11016 }
11017 }
11018 }
11019
11020 /**
11021 * xmlParseMisc:
11022 * @ctxt: an XML parser context
11023 *
11024 * DEPRECATED: Internal function, don't use.
11025 *
11026 * parse an XML Misc* optional field.
11027 *
11028 * [27] Misc ::= Comment | PI | S
11029 */
11030
11031 void
xmlParseMisc(xmlParserCtxtPtr ctxt)11032 xmlParseMisc(xmlParserCtxtPtr ctxt) {
11033 while (ctxt->instate != XML_PARSER_EOF) {
11034 SKIP_BLANKS;
11035 GROW;
11036 if ((RAW == '<') && (NXT(1) == '?')) {
11037 xmlParsePI(ctxt);
11038 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
11039 xmlParseComment(ctxt);
11040 } else {
11041 break;
11042 }
11043 }
11044 }
11045
11046 /**
11047 * xmlParseDocument:
11048 * @ctxt: an XML parser context
11049 *
11050 * parse an XML document (and build a tree if using the standard SAX
11051 * interface).
11052 *
11053 * [1] document ::= prolog element Misc*
11054 *
11055 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
11056 *
11057 * Returns 0, -1 in case of error. the parser context is augmented
11058 * as a result of the parsing.
11059 */
11060
11061 int
xmlParseDocument(xmlParserCtxtPtr ctxt)11062 xmlParseDocument(xmlParserCtxtPtr ctxt) {
11063 xmlInitParser();
11064
11065 if ((ctxt == NULL) || (ctxt->input == NULL))
11066 return(-1);
11067
11068 GROW;
11069
11070 /*
11071 * SAX: detecting the level.
11072 */
11073 xmlDetectSAX2(ctxt);
11074
11075 /*
11076 * SAX: beginning of the document processing.
11077 */
11078 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11079 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11080 if (ctxt->instate == XML_PARSER_EOF)
11081 return(-1);
11082
11083 xmlDetectEncoding(ctxt);
11084
11085 if (CUR == 0) {
11086 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11087 return(-1);
11088 }
11089
11090 GROW;
11091 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11092
11093 /*
11094 * Note that we will switch encoding on the fly.
11095 */
11096 xmlParseXMLDecl(ctxt);
11097 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
11098 (ctxt->instate == XML_PARSER_EOF)) {
11099 /*
11100 * The XML REC instructs us to stop parsing right here
11101 */
11102 return(-1);
11103 }
11104 SKIP_BLANKS;
11105 } else {
11106 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11107 }
11108 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11109 ctxt->sax->startDocument(ctxt->userData);
11110 if (ctxt->instate == XML_PARSER_EOF)
11111 return(-1);
11112 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
11113 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
11114 ctxt->myDoc->compression = ctxt->input->buf->compressed;
11115 }
11116
11117 /*
11118 * The Misc part of the Prolog
11119 */
11120 xmlParseMisc(ctxt);
11121
11122 /*
11123 * Then possibly doc type declaration(s) and more Misc
11124 * (doctypedecl Misc*)?
11125 */
11126 GROW;
11127 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
11128
11129 ctxt->inSubset = 1;
11130 xmlParseDocTypeDecl(ctxt);
11131 if (RAW == '[') {
11132 ctxt->instate = XML_PARSER_DTD;
11133 xmlParseInternalSubset(ctxt);
11134 if (ctxt->instate == XML_PARSER_EOF)
11135 return(-1);
11136 }
11137
11138 /*
11139 * Create and update the external subset.
11140 */
11141 ctxt->inSubset = 2;
11142 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
11143 (!ctxt->disableSAX))
11144 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11145 ctxt->extSubSystem, ctxt->extSubURI);
11146 if (ctxt->instate == XML_PARSER_EOF)
11147 return(-1);
11148 ctxt->inSubset = 0;
11149
11150 xmlCleanSpecialAttr(ctxt);
11151
11152 ctxt->instate = XML_PARSER_PROLOG;
11153 xmlParseMisc(ctxt);
11154 }
11155
11156 /*
11157 * Time to start parsing the tree itself
11158 */
11159 GROW;
11160 if (RAW != '<') {
11161 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11162 "Start tag expected, '<' not found\n");
11163 } else {
11164 ctxt->instate = XML_PARSER_CONTENT;
11165 xmlParseElement(ctxt);
11166 ctxt->instate = XML_PARSER_EPILOG;
11167
11168
11169 /*
11170 * The Misc part at the end
11171 */
11172 xmlParseMisc(ctxt);
11173
11174 if (ctxt->input->cur < ctxt->input->end) {
11175 if (ctxt->errNo == XML_ERR_OK)
11176 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11177 } else if ((ctxt->input->buf != NULL) &&
11178 (ctxt->input->buf->encoder != NULL) &&
11179 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11180 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11181 "Truncated multi-byte sequence at EOF\n");
11182 }
11183 ctxt->instate = XML_PARSER_EOF;
11184 }
11185
11186 /*
11187 * SAX: end of the document processing.
11188 */
11189 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11190 ctxt->sax->endDocument(ctxt->userData);
11191
11192 /*
11193 * Remove locally kept entity definitions if the tree was not built
11194 */
11195 if ((ctxt->myDoc != NULL) &&
11196 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
11197 xmlFreeDoc(ctxt->myDoc);
11198 ctxt->myDoc = NULL;
11199 }
11200
11201 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
11202 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
11203 if (ctxt->valid)
11204 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
11205 if (ctxt->nsWellFormed)
11206 ctxt->myDoc->properties |= XML_DOC_NSVALID;
11207 if (ctxt->options & XML_PARSE_OLD10)
11208 ctxt->myDoc->properties |= XML_DOC_OLD10;
11209 }
11210 if (! ctxt->wellFormed) {
11211 ctxt->valid = 0;
11212 return(-1);
11213 }
11214 return(0);
11215 }
11216
11217 /**
11218 * xmlParseExtParsedEnt:
11219 * @ctxt: an XML parser context
11220 *
11221 * parse a general parsed entity
11222 * An external general parsed entity is well-formed if it matches the
11223 * production labeled extParsedEnt.
11224 *
11225 * [78] extParsedEnt ::= TextDecl? content
11226 *
11227 * Returns 0, -1 in case of error. the parser context is augmented
11228 * as a result of the parsing.
11229 */
11230
11231 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)11232 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11233 if ((ctxt == NULL) || (ctxt->input == NULL))
11234 return(-1);
11235
11236 xmlDetectSAX2(ctxt);
11237
11238 /*
11239 * SAX: beginning of the document processing.
11240 */
11241 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11242 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11243
11244 xmlDetectEncoding(ctxt);
11245
11246 if (CUR == 0) {
11247 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11248 }
11249
11250 /*
11251 * Check for the XMLDecl in the Prolog.
11252 */
11253 GROW;
11254 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11255
11256 /*
11257 * Note that we will switch encoding on the fly.
11258 */
11259 xmlParseXMLDecl(ctxt);
11260 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11261 /*
11262 * The XML REC instructs us to stop parsing right here
11263 */
11264 return(-1);
11265 }
11266 SKIP_BLANKS;
11267 } else {
11268 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11269 }
11270 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11271 ctxt->sax->startDocument(ctxt->userData);
11272 if (ctxt->instate == XML_PARSER_EOF)
11273 return(-1);
11274
11275 /*
11276 * Doing validity checking on chunk doesn't make sense
11277 */
11278 ctxt->instate = XML_PARSER_CONTENT;
11279 ctxt->validate = 0;
11280 ctxt->loadsubset = 0;
11281 ctxt->depth = 0;
11282
11283 xmlParseContent(ctxt);
11284 if (ctxt->instate == XML_PARSER_EOF)
11285 return(-1);
11286
11287 if ((RAW == '<') && (NXT(1) == '/')) {
11288 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11289 } else if (RAW != 0) {
11290 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11291 }
11292
11293 /*
11294 * SAX: end of the document processing.
11295 */
11296 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11297 ctxt->sax->endDocument(ctxt->userData);
11298
11299 if (! ctxt->wellFormed) return(-1);
11300 return(0);
11301 }
11302
11303 #ifdef LIBXML_PUSH_ENABLED
11304 /************************************************************************
11305 * *
11306 * Progressive parsing interfaces *
11307 * *
11308 ************************************************************************/
11309
11310 /**
11311 * xmlParseLookupChar:
11312 * @ctxt: an XML parser context
11313 * @c: character
11314 *
11315 * Check whether the input buffer contains a character.
11316 */
11317 static int
xmlParseLookupChar(xmlParserCtxtPtr ctxt,int c)11318 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11319 const xmlChar *cur;
11320
11321 if (ctxt->checkIndex == 0) {
11322 cur = ctxt->input->cur + 1;
11323 } else {
11324 cur = ctxt->input->cur + ctxt->checkIndex;
11325 }
11326
11327 if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11328 size_t index = ctxt->input->end - ctxt->input->cur;
11329
11330 if (index > LONG_MAX) {
11331 ctxt->checkIndex = 0;
11332 return(1);
11333 }
11334 ctxt->checkIndex = index;
11335 return(0);
11336 } else {
11337 ctxt->checkIndex = 0;
11338 return(1);
11339 }
11340 }
11341
11342 /**
11343 * xmlParseLookupString:
11344 * @ctxt: an XML parser context
11345 * @startDelta: delta to apply at the start
11346 * @str: string
11347 * @strLen: length of string
11348 *
11349 * Check whether the input buffer contains a string.
11350 */
11351 static const xmlChar *
xmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen)11352 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11353 const char *str, size_t strLen) {
11354 const xmlChar *cur, *term;
11355
11356 if (ctxt->checkIndex == 0) {
11357 cur = ctxt->input->cur + startDelta;
11358 } else {
11359 cur = ctxt->input->cur + ctxt->checkIndex;
11360 }
11361
11362 term = BAD_CAST strstr((const char *) cur, str);
11363 if (term == NULL) {
11364 const xmlChar *end = ctxt->input->end;
11365 size_t index;
11366
11367 /* Rescan (strLen - 1) characters. */
11368 if ((size_t) (end - cur) < strLen)
11369 end = cur;
11370 else
11371 end -= strLen - 1;
11372 index = end - ctxt->input->cur;
11373 if (index > LONG_MAX) {
11374 ctxt->checkIndex = 0;
11375 return(ctxt->input->end - strLen);
11376 }
11377 ctxt->checkIndex = index;
11378 } else {
11379 ctxt->checkIndex = 0;
11380 }
11381
11382 return(term);
11383 }
11384
11385 /**
11386 * xmlParseLookupCharData:
11387 * @ctxt: an XML parser context
11388 *
11389 * Check whether the input buffer contains terminated char data.
11390 */
11391 static int
xmlParseLookupCharData(xmlParserCtxtPtr ctxt)11392 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11393 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11394 const xmlChar *end = ctxt->input->end;
11395 size_t index;
11396
11397 while (cur < end) {
11398 if ((*cur == '<') || (*cur == '&')) {
11399 ctxt->checkIndex = 0;
11400 return(1);
11401 }
11402 cur++;
11403 }
11404
11405 index = cur - ctxt->input->cur;
11406 if (index > LONG_MAX) {
11407 ctxt->checkIndex = 0;
11408 return(1);
11409 }
11410 ctxt->checkIndex = index;
11411 return(0);
11412 }
11413
11414 /**
11415 * xmlParseLookupGt:
11416 * @ctxt: an XML parser context
11417 *
11418 * Check whether there's enough data in the input buffer to finish parsing
11419 * a start tag. This has to take quotes into account.
11420 */
11421 static int
xmlParseLookupGt(xmlParserCtxtPtr ctxt)11422 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11423 const xmlChar *cur;
11424 const xmlChar *end = ctxt->input->end;
11425 int state = ctxt->endCheckState;
11426 size_t index;
11427
11428 if (ctxt->checkIndex == 0)
11429 cur = ctxt->input->cur + 1;
11430 else
11431 cur = ctxt->input->cur + ctxt->checkIndex;
11432
11433 while (cur < end) {
11434 if (state) {
11435 if (*cur == state)
11436 state = 0;
11437 } else if (*cur == '\'' || *cur == '"') {
11438 state = *cur;
11439 } else if (*cur == '>') {
11440 ctxt->checkIndex = 0;
11441 ctxt->endCheckState = 0;
11442 return(1);
11443 }
11444 cur++;
11445 }
11446
11447 index = cur - ctxt->input->cur;
11448 if (index > LONG_MAX) {
11449 ctxt->checkIndex = 0;
11450 ctxt->endCheckState = 0;
11451 return(1);
11452 }
11453 ctxt->checkIndex = index;
11454 ctxt->endCheckState = state;
11455 return(0);
11456 }
11457
11458 /**
11459 * xmlParseLookupInternalSubset:
11460 * @ctxt: an XML parser context
11461 *
11462 * Check whether there's enough data in the input buffer to finish parsing
11463 * the internal subset.
11464 */
11465 static int
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt)11466 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11467 /*
11468 * Sorry, but progressive parsing of the internal subset is not
11469 * supported. We first check that the full content of the internal
11470 * subset is available and parsing is launched only at that point.
11471 * Internal subset ends with "']' S? '>'" in an unescaped section and
11472 * not in a ']]>' sequence which are conditional sections.
11473 */
11474 const xmlChar *cur, *start;
11475 const xmlChar *end = ctxt->input->end;
11476 int state = ctxt->endCheckState;
11477 size_t index;
11478
11479 if (ctxt->checkIndex == 0) {
11480 cur = ctxt->input->cur + 1;
11481 } else {
11482 cur = ctxt->input->cur + ctxt->checkIndex;
11483 }
11484 start = cur;
11485
11486 while (cur < end) {
11487 if (state == '-') {
11488 if ((*cur == '-') &&
11489 (cur[1] == '-') &&
11490 (cur[2] == '>')) {
11491 state = 0;
11492 cur += 3;
11493 start = cur;
11494 continue;
11495 }
11496 }
11497 else if (state == ']') {
11498 if (*cur == '>') {
11499 ctxt->checkIndex = 0;
11500 ctxt->endCheckState = 0;
11501 return(1);
11502 }
11503 if (IS_BLANK_CH(*cur)) {
11504 state = ' ';
11505 } else if (*cur != ']') {
11506 state = 0;
11507 start = cur;
11508 continue;
11509 }
11510 }
11511 else if (state == ' ') {
11512 if (*cur == '>') {
11513 ctxt->checkIndex = 0;
11514 ctxt->endCheckState = 0;
11515 return(1);
11516 }
11517 if (!IS_BLANK_CH(*cur)) {
11518 state = 0;
11519 start = cur;
11520 continue;
11521 }
11522 }
11523 else if (state != 0) {
11524 if (*cur == state) {
11525 state = 0;
11526 start = cur + 1;
11527 }
11528 }
11529 else if (*cur == '<') {
11530 if ((cur[1] == '!') &&
11531 (cur[2] == '-') &&
11532 (cur[3] == '-')) {
11533 state = '-';
11534 cur += 4;
11535 /* Don't treat <!--> as comment */
11536 start = cur;
11537 continue;
11538 }
11539 }
11540 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11541 state = *cur;
11542 }
11543
11544 cur++;
11545 }
11546
11547 /*
11548 * Rescan the three last characters to detect "<!--" and "-->"
11549 * split across chunks.
11550 */
11551 if ((state == 0) || (state == '-')) {
11552 if (cur - start < 3)
11553 cur = start;
11554 else
11555 cur -= 3;
11556 }
11557 index = cur - ctxt->input->cur;
11558 if (index > LONG_MAX) {
11559 ctxt->checkIndex = 0;
11560 ctxt->endCheckState = 0;
11561 return(1);
11562 }
11563 ctxt->checkIndex = index;
11564 ctxt->endCheckState = state;
11565 return(0);
11566 }
11567
11568 /**
11569 * xmlCheckCdataPush:
11570 * @cur: pointer to the block of characters
11571 * @len: length of the block in bytes
11572 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11573 *
11574 * Check that the block of characters is okay as SCdata content [20]
11575 *
11576 * Returns the number of bytes to pass if okay, a negative index where an
11577 * UTF-8 error occurred otherwise
11578 */
11579 static int
xmlCheckCdataPush(const xmlChar * utf,int len,int complete)11580 xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11581 int ix;
11582 unsigned char c;
11583 int codepoint;
11584
11585 if ((utf == NULL) || (len <= 0))
11586 return(0);
11587
11588 for (ix = 0; ix < len;) { /* string is 0-terminated */
11589 c = utf[ix];
11590 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11591 if (c >= 0x20)
11592 ix++;
11593 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11594 ix++;
11595 else
11596 return(-ix);
11597 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11598 if (ix + 2 > len) return(complete ? -ix : ix);
11599 if ((utf[ix+1] & 0xc0 ) != 0x80)
11600 return(-ix);
11601 codepoint = (utf[ix] & 0x1f) << 6;
11602 codepoint |= utf[ix+1] & 0x3f;
11603 if (!xmlIsCharQ(codepoint))
11604 return(-ix);
11605 ix += 2;
11606 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11607 if (ix + 3 > len) return(complete ? -ix : ix);
11608 if (((utf[ix+1] & 0xc0) != 0x80) ||
11609 ((utf[ix+2] & 0xc0) != 0x80))
11610 return(-ix);
11611 codepoint = (utf[ix] & 0xf) << 12;
11612 codepoint |= (utf[ix+1] & 0x3f) << 6;
11613 codepoint |= utf[ix+2] & 0x3f;
11614 if (!xmlIsCharQ(codepoint))
11615 return(-ix);
11616 ix += 3;
11617 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11618 if (ix + 4 > len) return(complete ? -ix : ix);
11619 if (((utf[ix+1] & 0xc0) != 0x80) ||
11620 ((utf[ix+2] & 0xc0) != 0x80) ||
11621 ((utf[ix+3] & 0xc0) != 0x80))
11622 return(-ix);
11623 codepoint = (utf[ix] & 0x7) << 18;
11624 codepoint |= (utf[ix+1] & 0x3f) << 12;
11625 codepoint |= (utf[ix+2] & 0x3f) << 6;
11626 codepoint |= utf[ix+3] & 0x3f;
11627 if (!xmlIsCharQ(codepoint))
11628 return(-ix);
11629 ix += 4;
11630 } else /* unknown encoding */
11631 return(-ix);
11632 }
11633 return(ix);
11634 }
11635
11636 /**
11637 * xmlParseTryOrFinish:
11638 * @ctxt: an XML parser context
11639 * @terminate: last chunk indicator
11640 *
11641 * Try to progress on parsing
11642 *
11643 * Returns zero if no parsing was possible
11644 */
11645 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11646 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11647 int ret = 0;
11648 size_t avail;
11649 xmlChar cur, next;
11650
11651 if (ctxt->input == NULL)
11652 return(0);
11653
11654 if ((ctxt->input != NULL) &&
11655 (ctxt->input->cur - ctxt->input->base > 4096)) {
11656 xmlParserShrink(ctxt);
11657 }
11658
11659 while (ctxt->instate != XML_PARSER_EOF) {
11660 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11661 return(0);
11662
11663 avail = ctxt->input->end - ctxt->input->cur;
11664 if (avail < 1)
11665 goto done;
11666 switch (ctxt->instate) {
11667 case XML_PARSER_EOF:
11668 /*
11669 * Document parsing is done !
11670 */
11671 goto done;
11672 case XML_PARSER_START:
11673 /*
11674 * Very first chars read from the document flow.
11675 */
11676 if ((!terminate) && (avail < 4))
11677 goto done;
11678
11679 /*
11680 * We need more bytes to detect EBCDIC code pages.
11681 * See xmlDetectEBCDIC.
11682 */
11683 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11684 (!terminate) && (avail < 200))
11685 goto done;
11686
11687 xmlDetectEncoding(ctxt);
11688 if (ctxt->instate == XML_PARSER_EOF)
11689 goto done;
11690 ctxt->instate = XML_PARSER_XML_DECL;
11691 break;
11692
11693 case XML_PARSER_XML_DECL:
11694 if ((!terminate) && (avail < 2))
11695 goto done;
11696 cur = ctxt->input->cur[0];
11697 next = ctxt->input->cur[1];
11698 if ((cur == '<') && (next == '?')) {
11699 /* PI or XML decl */
11700 if ((!terminate) &&
11701 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11702 goto done;
11703 if ((ctxt->input->cur[2] == 'x') &&
11704 (ctxt->input->cur[3] == 'm') &&
11705 (ctxt->input->cur[4] == 'l') &&
11706 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11707 ret += 5;
11708 xmlParseXMLDecl(ctxt);
11709 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11710 /*
11711 * The XML REC instructs us to stop parsing right
11712 * here
11713 */
11714 xmlHaltParser(ctxt);
11715 return(0);
11716 }
11717 } else {
11718 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11719 }
11720 } else {
11721 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11722 if (ctxt->version == NULL) {
11723 xmlErrMemory(ctxt, NULL);
11724 break;
11725 }
11726 }
11727 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11728 ctxt->sax->setDocumentLocator(ctxt->userData,
11729 &xmlDefaultSAXLocator);
11730 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11731 (!ctxt->disableSAX))
11732 ctxt->sax->startDocument(ctxt->userData);
11733 if (ctxt->instate == XML_PARSER_EOF)
11734 goto done;
11735 ctxt->instate = XML_PARSER_MISC;
11736 break;
11737 case XML_PARSER_START_TAG: {
11738 const xmlChar *name;
11739 const xmlChar *prefix = NULL;
11740 const xmlChar *URI = NULL;
11741 int line = ctxt->input->line;
11742 int nbNs = 0;
11743
11744 if ((!terminate) && (avail < 2))
11745 goto done;
11746 cur = ctxt->input->cur[0];
11747 if (cur != '<') {
11748 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11749 "Start tag expected, '<' not found");
11750 xmlHaltParser(ctxt);
11751 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11752 ctxt->sax->endDocument(ctxt->userData);
11753 goto done;
11754 }
11755 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11756 goto done;
11757 if (ctxt->spaceNr == 0)
11758 spacePush(ctxt, -1);
11759 else if (*ctxt->space == -2)
11760 spacePush(ctxt, -1);
11761 else
11762 spacePush(ctxt, *ctxt->space);
11763 #ifdef LIBXML_SAX1_ENABLED
11764 if (ctxt->sax2)
11765 #endif /* LIBXML_SAX1_ENABLED */
11766 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11767 #ifdef LIBXML_SAX1_ENABLED
11768 else
11769 name = xmlParseStartTag(ctxt);
11770 #endif /* LIBXML_SAX1_ENABLED */
11771 if (ctxt->instate == XML_PARSER_EOF)
11772 goto done;
11773 if (name == NULL) {
11774 spacePop(ctxt);
11775 xmlHaltParser(ctxt);
11776 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11777 ctxt->sax->endDocument(ctxt->userData);
11778 goto done;
11779 }
11780 #ifdef LIBXML_VALID_ENABLED
11781 /*
11782 * [ VC: Root Element Type ]
11783 * The Name in the document type declaration must match
11784 * the element type of the root element.
11785 */
11786 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11787 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11788 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11789 #endif /* LIBXML_VALID_ENABLED */
11790
11791 /*
11792 * Check for an Empty Element.
11793 */
11794 if ((RAW == '/') && (NXT(1) == '>')) {
11795 SKIP(2);
11796
11797 if (ctxt->sax2) {
11798 if ((ctxt->sax != NULL) &&
11799 (ctxt->sax->endElementNs != NULL) &&
11800 (!ctxt->disableSAX))
11801 ctxt->sax->endElementNs(ctxt->userData, name,
11802 prefix, URI);
11803 if (nbNs > 0)
11804 xmlParserNsPop(ctxt, nbNs);
11805 #ifdef LIBXML_SAX1_ENABLED
11806 } else {
11807 if ((ctxt->sax != NULL) &&
11808 (ctxt->sax->endElement != NULL) &&
11809 (!ctxt->disableSAX))
11810 ctxt->sax->endElement(ctxt->userData, name);
11811 #endif /* LIBXML_SAX1_ENABLED */
11812 }
11813 spacePop(ctxt);
11814 } else if (RAW == '>') {
11815 NEXT;
11816 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11817 } else {
11818 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11819 "Couldn't find end of Start Tag %s\n",
11820 name);
11821 nodePop(ctxt);
11822 spacePop(ctxt);
11823 if (nbNs > 0)
11824 xmlParserNsPop(ctxt, nbNs);
11825 }
11826
11827 if (ctxt->instate == XML_PARSER_EOF)
11828 goto done;
11829 if (ctxt->nameNr == 0)
11830 ctxt->instate = XML_PARSER_EPILOG;
11831 else
11832 ctxt->instate = XML_PARSER_CONTENT;
11833 break;
11834 }
11835 case XML_PARSER_CONTENT: {
11836 cur = ctxt->input->cur[0];
11837
11838 if (cur == '<') {
11839 if ((!terminate) && (avail < 2))
11840 goto done;
11841 next = ctxt->input->cur[1];
11842
11843 if (next == '/') {
11844 ctxt->instate = XML_PARSER_END_TAG;
11845 break;
11846 } else if (next == '?') {
11847 if ((!terminate) &&
11848 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11849 goto done;
11850 xmlParsePI(ctxt);
11851 if (ctxt->instate == XML_PARSER_EOF)
11852 goto done;
11853 ctxt->instate = XML_PARSER_CONTENT;
11854 break;
11855 } else if (next == '!') {
11856 if ((!terminate) && (avail < 3))
11857 goto done;
11858 next = ctxt->input->cur[2];
11859
11860 if (next == '-') {
11861 if ((!terminate) && (avail < 4))
11862 goto done;
11863 if (ctxt->input->cur[3] == '-') {
11864 if ((!terminate) &&
11865 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11866 goto done;
11867 xmlParseComment(ctxt);
11868 if (ctxt->instate == XML_PARSER_EOF)
11869 goto done;
11870 ctxt->instate = XML_PARSER_CONTENT;
11871 break;
11872 }
11873 } else if (next == '[') {
11874 if ((!terminate) && (avail < 9))
11875 goto done;
11876 if ((ctxt->input->cur[2] == '[') &&
11877 (ctxt->input->cur[3] == 'C') &&
11878 (ctxt->input->cur[4] == 'D') &&
11879 (ctxt->input->cur[5] == 'A') &&
11880 (ctxt->input->cur[6] == 'T') &&
11881 (ctxt->input->cur[7] == 'A') &&
11882 (ctxt->input->cur[8] == '[')) {
11883 SKIP(9);
11884 ctxt->instate = XML_PARSER_CDATA_SECTION;
11885 break;
11886 }
11887 }
11888 }
11889 } else if (cur == '&') {
11890 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11891 goto done;
11892 xmlParseReference(ctxt);
11893 break;
11894 } else {
11895 /* TODO Avoid the extra copy, handle directly !!! */
11896 /*
11897 * Goal of the following test is:
11898 * - minimize calls to the SAX 'character' callback
11899 * when they are mergeable
11900 * - handle an problem for isBlank when we only parse
11901 * a sequence of blank chars and the next one is
11902 * not available to check against '<' presence.
11903 * - tries to homogenize the differences in SAX
11904 * callbacks between the push and pull versions
11905 * of the parser.
11906 */
11907 if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11908 if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11909 goto done;
11910 }
11911 ctxt->checkIndex = 0;
11912 xmlParseCharDataInternal(ctxt, !terminate);
11913 break;
11914 }
11915
11916 ctxt->instate = XML_PARSER_START_TAG;
11917 break;
11918 }
11919 case XML_PARSER_END_TAG:
11920 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11921 goto done;
11922 if (ctxt->sax2) {
11923 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11924 nameNsPop(ctxt);
11925 }
11926 #ifdef LIBXML_SAX1_ENABLED
11927 else
11928 xmlParseEndTag1(ctxt, 0);
11929 #endif /* LIBXML_SAX1_ENABLED */
11930 if (ctxt->instate == XML_PARSER_EOF)
11931 goto done;
11932 if (ctxt->nameNr == 0) {
11933 ctxt->instate = XML_PARSER_EPILOG;
11934 } else {
11935 ctxt->instate = XML_PARSER_CONTENT;
11936 }
11937 break;
11938 case XML_PARSER_CDATA_SECTION: {
11939 /*
11940 * The Push mode need to have the SAX callback for
11941 * cdataBlock merge back contiguous callbacks.
11942 */
11943 const xmlChar *term;
11944
11945 if (terminate) {
11946 /*
11947 * Don't call xmlParseLookupString. If 'terminate'
11948 * is set, checkIndex is invalid.
11949 */
11950 term = BAD_CAST strstr((const char *) ctxt->input->cur,
11951 "]]>");
11952 } else {
11953 term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11954 }
11955
11956 if (term == NULL) {
11957 int tmp, size;
11958
11959 if (terminate) {
11960 /* Unfinished CDATA section */
11961 size = ctxt->input->end - ctxt->input->cur;
11962 } else {
11963 if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11964 goto done;
11965 ctxt->checkIndex = 0;
11966 /* XXX: Why don't we pass the full buffer? */
11967 size = XML_PARSER_BIG_BUFFER_SIZE;
11968 }
11969 tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11970 if (tmp <= 0) {
11971 tmp = -tmp;
11972 ctxt->input->cur += tmp;
11973 goto encoding_error;
11974 }
11975 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11976 if (ctxt->sax->cdataBlock != NULL)
11977 ctxt->sax->cdataBlock(ctxt->userData,
11978 ctxt->input->cur, tmp);
11979 else if (ctxt->sax->characters != NULL)
11980 ctxt->sax->characters(ctxt->userData,
11981 ctxt->input->cur, tmp);
11982 }
11983 if (ctxt->instate == XML_PARSER_EOF)
11984 goto done;
11985 SKIPL(tmp);
11986 } else {
11987 int base = term - CUR_PTR;
11988 int tmp;
11989
11990 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11991 if ((tmp < 0) || (tmp != base)) {
11992 tmp = -tmp;
11993 ctxt->input->cur += tmp;
11994 goto encoding_error;
11995 }
11996 if ((ctxt->sax != NULL) && (base == 0) &&
11997 (ctxt->sax->cdataBlock != NULL) &&
11998 (!ctxt->disableSAX)) {
11999 /*
12000 * Special case to provide identical behaviour
12001 * between pull and push parsers on enpty CDATA
12002 * sections
12003 */
12004 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
12005 (!strncmp((const char *)&ctxt->input->cur[-9],
12006 "<![CDATA[", 9)))
12007 ctxt->sax->cdataBlock(ctxt->userData,
12008 BAD_CAST "", 0);
12009 } else if ((ctxt->sax != NULL) && (base > 0) &&
12010 (!ctxt->disableSAX)) {
12011 if (ctxt->sax->cdataBlock != NULL)
12012 ctxt->sax->cdataBlock(ctxt->userData,
12013 ctxt->input->cur, base);
12014 else if (ctxt->sax->characters != NULL)
12015 ctxt->sax->characters(ctxt->userData,
12016 ctxt->input->cur, base);
12017 }
12018 if (ctxt->instate == XML_PARSER_EOF)
12019 goto done;
12020 SKIPL(base + 3);
12021 ctxt->instate = XML_PARSER_CONTENT;
12022 }
12023 break;
12024 }
12025 case XML_PARSER_MISC:
12026 case XML_PARSER_PROLOG:
12027 case XML_PARSER_EPILOG:
12028 SKIP_BLANKS;
12029 avail = ctxt->input->end - ctxt->input->cur;
12030 if (avail < 1)
12031 goto done;
12032 if (ctxt->input->cur[0] == '<') {
12033 if ((!terminate) && (avail < 2))
12034 goto done;
12035 next = ctxt->input->cur[1];
12036 if (next == '?') {
12037 if ((!terminate) &&
12038 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
12039 goto done;
12040 xmlParsePI(ctxt);
12041 if (ctxt->instate == XML_PARSER_EOF)
12042 goto done;
12043 break;
12044 } else if (next == '!') {
12045 if ((!terminate) && (avail < 3))
12046 goto done;
12047
12048 if (ctxt->input->cur[2] == '-') {
12049 if ((!terminate) && (avail < 4))
12050 goto done;
12051 if (ctxt->input->cur[3] == '-') {
12052 if ((!terminate) &&
12053 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
12054 goto done;
12055 xmlParseComment(ctxt);
12056 if (ctxt->instate == XML_PARSER_EOF)
12057 goto done;
12058 break;
12059 }
12060 } else if (ctxt->instate == XML_PARSER_MISC) {
12061 if ((!terminate) && (avail < 9))
12062 goto done;
12063 if ((ctxt->input->cur[2] == 'D') &&
12064 (ctxt->input->cur[3] == 'O') &&
12065 (ctxt->input->cur[4] == 'C') &&
12066 (ctxt->input->cur[5] == 'T') &&
12067 (ctxt->input->cur[6] == 'Y') &&
12068 (ctxt->input->cur[7] == 'P') &&
12069 (ctxt->input->cur[8] == 'E')) {
12070 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
12071 goto done;
12072 ctxt->inSubset = 1;
12073 xmlParseDocTypeDecl(ctxt);
12074 if (ctxt->instate == XML_PARSER_EOF)
12075 goto done;
12076 if (RAW == '[') {
12077 ctxt->instate = XML_PARSER_DTD;
12078 } else {
12079 /*
12080 * Create and update the external subset.
12081 */
12082 ctxt->inSubset = 2;
12083 if ((ctxt->sax != NULL) &&
12084 (!ctxt->disableSAX) &&
12085 (ctxt->sax->externalSubset != NULL))
12086 ctxt->sax->externalSubset(
12087 ctxt->userData,
12088 ctxt->intSubName,
12089 ctxt->extSubSystem,
12090 ctxt->extSubURI);
12091 ctxt->inSubset = 0;
12092 xmlCleanSpecialAttr(ctxt);
12093 if (ctxt->instate == XML_PARSER_EOF)
12094 goto done;
12095 ctxt->instate = XML_PARSER_PROLOG;
12096 }
12097 break;
12098 }
12099 }
12100 }
12101 }
12102
12103 if (ctxt->instate == XML_PARSER_EPILOG) {
12104 if (ctxt->errNo == XML_ERR_OK)
12105 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12106 ctxt->instate = XML_PARSER_EOF;
12107 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12108 ctxt->sax->endDocument(ctxt->userData);
12109 } else {
12110 ctxt->instate = XML_PARSER_START_TAG;
12111 }
12112 break;
12113 case XML_PARSER_DTD: {
12114 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12115 goto done;
12116 xmlParseInternalSubset(ctxt);
12117 if (ctxt->instate == XML_PARSER_EOF)
12118 goto done;
12119 ctxt->inSubset = 2;
12120 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12121 (ctxt->sax->externalSubset != NULL))
12122 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12123 ctxt->extSubSystem, ctxt->extSubURI);
12124 ctxt->inSubset = 0;
12125 xmlCleanSpecialAttr(ctxt);
12126 if (ctxt->instate == XML_PARSER_EOF)
12127 goto done;
12128 ctxt->instate = XML_PARSER_PROLOG;
12129 break;
12130 }
12131 default:
12132 xmlGenericError(xmlGenericErrorContext,
12133 "PP: internal error\n");
12134 ctxt->instate = XML_PARSER_EOF;
12135 break;
12136 }
12137 }
12138 done:
12139 return(ret);
12140 encoding_error:
12141 if (ctxt->input->end - ctxt->input->cur < 4) {
12142 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12143 "Input is not proper UTF-8, indicate encoding !\n",
12144 NULL, NULL);
12145 } else {
12146 char buffer[150];
12147
12148 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12149 ctxt->input->cur[0], ctxt->input->cur[1],
12150 ctxt->input->cur[2], ctxt->input->cur[3]);
12151 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12152 "Input is not proper UTF-8, indicate encoding !\n%s",
12153 BAD_CAST buffer, NULL);
12154 }
12155 return(0);
12156 }
12157
12158 /**
12159 * xmlParseChunk:
12160 * @ctxt: an XML parser context
12161 * @chunk: an char array
12162 * @size: the size in byte of the chunk
12163 * @terminate: last chunk indicator
12164 *
12165 * Parse a Chunk of memory
12166 *
12167 * Returns zero if no error, the xmlParserErrors otherwise.
12168 */
12169 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)12170 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12171 int terminate) {
12172 int end_in_lf = 0;
12173
12174 if (ctxt == NULL)
12175 return(XML_ERR_INTERNAL_ERROR);
12176 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12177 return(ctxt->errNo);
12178 if (ctxt->instate == XML_PARSER_EOF)
12179 return(-1);
12180 if (ctxt->input == NULL)
12181 return(-1);
12182
12183 ctxt->progressive = 1;
12184 if (ctxt->instate == XML_PARSER_START)
12185 xmlDetectSAX2(ctxt);
12186 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12187 (chunk[size - 1] == '\r')) {
12188 end_in_lf = 1;
12189 size--;
12190 }
12191
12192 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12193 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12194 size_t pos = ctxt->input->cur - ctxt->input->base;
12195 int res;
12196
12197 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12198 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12199 if (res < 0) {
12200 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12201 xmlHaltParser(ctxt);
12202 return(ctxt->errNo);
12203 }
12204 }
12205
12206 xmlParseTryOrFinish(ctxt, terminate);
12207 if (ctxt->instate == XML_PARSER_EOF)
12208 return(ctxt->errNo);
12209
12210 if ((ctxt->input != NULL) &&
12211 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12212 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12213 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12214 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12215 xmlHaltParser(ctxt);
12216 }
12217 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12218 return(ctxt->errNo);
12219
12220 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12221 (ctxt->input->buf != NULL)) {
12222 size_t pos = ctxt->input->cur - ctxt->input->base;
12223 int res;
12224
12225 res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12226 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12227 if (res < 0) {
12228 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12229 xmlHaltParser(ctxt);
12230 return(ctxt->errNo);
12231 }
12232 }
12233 if (terminate) {
12234 /*
12235 * Check for termination
12236 */
12237 if ((ctxt->instate != XML_PARSER_EOF) &&
12238 (ctxt->instate != XML_PARSER_EPILOG)) {
12239 if (ctxt->nameNr > 0) {
12240 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
12241 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
12242 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
12243 "Premature end of data in tag %s line %d\n",
12244 name, line, NULL);
12245 } else if (ctxt->instate == XML_PARSER_START) {
12246 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
12247 } else {
12248 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
12249 "Start tag expected, '<' not found\n");
12250 }
12251 } else if ((ctxt->input->buf != NULL) &&
12252 (ctxt->input->buf->encoder != NULL) &&
12253 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
12254 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
12255 "Truncated multi-byte sequence at EOF\n");
12256 }
12257 if (ctxt->instate != XML_PARSER_EOF) {
12258 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12259 ctxt->sax->endDocument(ctxt->userData);
12260 }
12261 ctxt->instate = XML_PARSER_EOF;
12262 }
12263 if (ctxt->wellFormed == 0)
12264 return((xmlParserErrors) ctxt->errNo);
12265 else
12266 return(0);
12267 }
12268
12269 /************************************************************************
12270 * *
12271 * I/O front end functions to the parser *
12272 * *
12273 ************************************************************************/
12274
12275 /**
12276 * xmlCreatePushParserCtxt:
12277 * @sax: a SAX handler
12278 * @user_data: The user data returned on SAX callbacks
12279 * @chunk: a pointer to an array of chars
12280 * @size: number of chars in the array
12281 * @filename: an optional file name or URI
12282 *
12283 * Create a parser context for using the XML parser in push mode.
12284 * If @buffer and @size are non-NULL, the data is used to detect
12285 * the encoding. The remaining characters will be parsed so they
12286 * don't need to be fed in again through xmlParseChunk.
12287 * To allow content encoding detection, @size should be >= 4
12288 * The value of @filename is used for fetching external entities
12289 * and error/warning reports.
12290 *
12291 * Returns the new parser context or NULL
12292 */
12293
12294 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)12295 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12296 const char *chunk, int size, const char *filename) {
12297 xmlParserCtxtPtr ctxt;
12298 xmlParserInputPtr inputStream;
12299 xmlParserInputBufferPtr buf;
12300
12301 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12302 if (buf == NULL) return(NULL);
12303
12304 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12305 if (ctxt == NULL) {
12306 xmlErrMemory(NULL, "creating parser: out of memory\n");
12307 xmlFreeParserInputBuffer(buf);
12308 return(NULL);
12309 }
12310 ctxt->dictNames = 1;
12311 if (filename == NULL) {
12312 ctxt->directory = NULL;
12313 } else {
12314 ctxt->directory = xmlParserGetDirectory(filename);
12315 }
12316
12317 inputStream = xmlNewInputStream(ctxt);
12318 if (inputStream == NULL) {
12319 xmlFreeParserCtxt(ctxt);
12320 xmlFreeParserInputBuffer(buf);
12321 return(NULL);
12322 }
12323
12324 if (filename == NULL)
12325 inputStream->filename = NULL;
12326 else {
12327 inputStream->filename = (char *)
12328 xmlCanonicPath((const xmlChar *) filename);
12329 if (inputStream->filename == NULL) {
12330 xmlFreeInputStream(inputStream);
12331 xmlFreeParserCtxt(ctxt);
12332 xmlFreeParserInputBuffer(buf);
12333 return(NULL);
12334 }
12335 }
12336 inputStream->buf = buf;
12337 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12338 inputPush(ctxt, inputStream);
12339
12340 if ((size != 0) && (chunk != NULL) &&
12341 (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12342 size_t pos = ctxt->input->cur - ctxt->input->base;
12343 int res;
12344
12345 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12346 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12347 if (res < 0) {
12348 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12349 xmlHaltParser(ctxt);
12350 }
12351 }
12352
12353 return(ctxt);
12354 }
12355 #endif /* LIBXML_PUSH_ENABLED */
12356
12357 /**
12358 * xmlStopParser:
12359 * @ctxt: an XML parser context
12360 *
12361 * Blocks further parser processing
12362 */
12363 void
xmlStopParser(xmlParserCtxtPtr ctxt)12364 xmlStopParser(xmlParserCtxtPtr ctxt) {
12365 if (ctxt == NULL)
12366 return;
12367 xmlHaltParser(ctxt);
12368 ctxt->errNo = XML_ERR_USER_STOP;
12369 }
12370
12371 /**
12372 * xmlCreateIOParserCtxt:
12373 * @sax: a SAX handler
12374 * @user_data: The user data returned on SAX callbacks
12375 * @ioread: an I/O read function
12376 * @ioclose: an I/O close function
12377 * @ioctx: an I/O handler
12378 * @enc: the charset encoding if known
12379 *
12380 * Create a parser context for using the XML parser with an existing
12381 * I/O stream
12382 *
12383 * Returns the new parser context or NULL
12384 */
12385 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)12386 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12387 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12388 void *ioctx, xmlCharEncoding enc) {
12389 xmlParserCtxtPtr ctxt;
12390 xmlParserInputPtr inputStream;
12391 xmlParserInputBufferPtr buf;
12392
12393 if (ioread == NULL) return(NULL);
12394
12395 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12396 if (buf == NULL) {
12397 if (ioclose != NULL)
12398 ioclose(ioctx);
12399 return (NULL);
12400 }
12401
12402 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12403 if (ctxt == NULL) {
12404 xmlFreeParserInputBuffer(buf);
12405 return(NULL);
12406 }
12407
12408 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12409 if (inputStream == NULL) {
12410 xmlFreeParserCtxt(ctxt);
12411 return(NULL);
12412 }
12413 inputPush(ctxt, inputStream);
12414
12415 return(ctxt);
12416 }
12417
12418 #ifdef LIBXML_VALID_ENABLED
12419 /************************************************************************
12420 * *
12421 * Front ends when parsing a DTD *
12422 * *
12423 ************************************************************************/
12424
12425 /**
12426 * xmlIOParseDTD:
12427 * @sax: the SAX handler block or NULL
12428 * @input: an Input Buffer
12429 * @enc: the charset encoding if known
12430 *
12431 * Load and parse a DTD
12432 *
12433 * Returns the resulting xmlDtdPtr or NULL in case of error.
12434 * @input will be freed by the function in any case.
12435 */
12436
12437 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12438 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12439 xmlCharEncoding enc) {
12440 xmlDtdPtr ret = NULL;
12441 xmlParserCtxtPtr ctxt;
12442 xmlParserInputPtr pinput = NULL;
12443
12444 if (input == NULL)
12445 return(NULL);
12446
12447 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12448 if (ctxt == NULL) {
12449 xmlFreeParserInputBuffer(input);
12450 return(NULL);
12451 }
12452
12453 /* We are loading a DTD */
12454 ctxt->options |= XML_PARSE_DTDLOAD;
12455
12456 xmlDetectSAX2(ctxt);
12457
12458 /*
12459 * generate a parser input from the I/O handler
12460 */
12461
12462 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12463 if (pinput == NULL) {
12464 xmlFreeParserInputBuffer(input);
12465 xmlFreeParserCtxt(ctxt);
12466 return(NULL);
12467 }
12468
12469 /*
12470 * plug some encoding conversion routines here.
12471 */
12472 if (xmlPushInput(ctxt, pinput) < 0) {
12473 xmlFreeParserCtxt(ctxt);
12474 return(NULL);
12475 }
12476 if (enc != XML_CHAR_ENCODING_NONE) {
12477 xmlSwitchEncoding(ctxt, enc);
12478 }
12479
12480 /*
12481 * let's parse that entity knowing it's an external subset.
12482 */
12483 ctxt->inSubset = 2;
12484 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12485 if (ctxt->myDoc == NULL) {
12486 xmlErrMemory(ctxt, "New Doc failed");
12487 return(NULL);
12488 }
12489 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12490 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12491 BAD_CAST "none", BAD_CAST "none");
12492
12493 xmlDetectEncoding(ctxt);
12494
12495 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12496
12497 if (ctxt->myDoc != NULL) {
12498 if (ctxt->wellFormed) {
12499 ret = ctxt->myDoc->extSubset;
12500 ctxt->myDoc->extSubset = NULL;
12501 if (ret != NULL) {
12502 xmlNodePtr tmp;
12503
12504 ret->doc = NULL;
12505 tmp = ret->children;
12506 while (tmp != NULL) {
12507 tmp->doc = NULL;
12508 tmp = tmp->next;
12509 }
12510 }
12511 } else {
12512 ret = NULL;
12513 }
12514 xmlFreeDoc(ctxt->myDoc);
12515 ctxt->myDoc = NULL;
12516 }
12517 xmlFreeParserCtxt(ctxt);
12518
12519 return(ret);
12520 }
12521
12522 /**
12523 * xmlSAXParseDTD:
12524 * @sax: the SAX handler block
12525 * @ExternalID: a NAME* containing the External ID of the DTD
12526 * @SystemID: a NAME* containing the URL to the DTD
12527 *
12528 * DEPRECATED: Don't use.
12529 *
12530 * Load and parse an external subset.
12531 *
12532 * Returns the resulting xmlDtdPtr or NULL in case of error.
12533 */
12534
12535 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12536 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12537 const xmlChar *SystemID) {
12538 xmlDtdPtr ret = NULL;
12539 xmlParserCtxtPtr ctxt;
12540 xmlParserInputPtr input = NULL;
12541 xmlChar* systemIdCanonic;
12542
12543 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12544
12545 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12546 if (ctxt == NULL) {
12547 return(NULL);
12548 }
12549
12550 /* We are loading a DTD */
12551 ctxt->options |= XML_PARSE_DTDLOAD;
12552
12553 /*
12554 * Canonicalise the system ID
12555 */
12556 systemIdCanonic = xmlCanonicPath(SystemID);
12557 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12558 xmlFreeParserCtxt(ctxt);
12559 return(NULL);
12560 }
12561
12562 /*
12563 * Ask the Entity resolver to load the damn thing
12564 */
12565
12566 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12567 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12568 systemIdCanonic);
12569 if (input == NULL) {
12570 xmlFreeParserCtxt(ctxt);
12571 if (systemIdCanonic != NULL)
12572 xmlFree(systemIdCanonic);
12573 return(NULL);
12574 }
12575
12576 /*
12577 * plug some encoding conversion routines here.
12578 */
12579 if (xmlPushInput(ctxt, input) < 0) {
12580 xmlFreeParserCtxt(ctxt);
12581 if (systemIdCanonic != NULL)
12582 xmlFree(systemIdCanonic);
12583 return(NULL);
12584 }
12585
12586 xmlDetectEncoding(ctxt);
12587
12588 if (input->filename == NULL)
12589 input->filename = (char *) systemIdCanonic;
12590 else
12591 xmlFree(systemIdCanonic);
12592
12593 /*
12594 * let's parse that entity knowing it's an external subset.
12595 */
12596 ctxt->inSubset = 2;
12597 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12598 if (ctxt->myDoc == NULL) {
12599 xmlErrMemory(ctxt, "New Doc failed");
12600 xmlFreeParserCtxt(ctxt);
12601 return(NULL);
12602 }
12603 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12604 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12605 ExternalID, SystemID);
12606 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12607
12608 if (ctxt->myDoc != NULL) {
12609 if (ctxt->wellFormed) {
12610 ret = ctxt->myDoc->extSubset;
12611 ctxt->myDoc->extSubset = NULL;
12612 if (ret != NULL) {
12613 xmlNodePtr tmp;
12614
12615 ret->doc = NULL;
12616 tmp = ret->children;
12617 while (tmp != NULL) {
12618 tmp->doc = NULL;
12619 tmp = tmp->next;
12620 }
12621 }
12622 } else {
12623 ret = NULL;
12624 }
12625 xmlFreeDoc(ctxt->myDoc);
12626 ctxt->myDoc = NULL;
12627 }
12628 xmlFreeParserCtxt(ctxt);
12629
12630 return(ret);
12631 }
12632
12633
12634 /**
12635 * xmlParseDTD:
12636 * @ExternalID: a NAME* containing the External ID of the DTD
12637 * @SystemID: a NAME* containing the URL to the DTD
12638 *
12639 * Load and parse an external subset.
12640 *
12641 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642 */
12643
12644 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12645 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12646 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12647 }
12648 #endif /* LIBXML_VALID_ENABLED */
12649
12650 /************************************************************************
12651 * *
12652 * Front ends when parsing an Entity *
12653 * *
12654 ************************************************************************/
12655
12656 /**
12657 * xmlParseCtxtExternalEntity:
12658 * @ctx: the existing parsing context
12659 * @URL: the URL for the entity to load
12660 * @ID: the System ID for the entity to load
12661 * @lst: the return value for the set of parsed nodes
12662 *
12663 * Parse an external general entity within an existing parsing context
12664 * An external general parsed entity is well-formed if it matches the
12665 * production labeled extParsedEnt.
12666 *
12667 * [78] extParsedEnt ::= TextDecl? content
12668 *
12669 * Returns 0 if the entity is well formed, -1 in case of args problem and
12670 * the parser error code otherwise
12671 */
12672
12673 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12674 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12675 const xmlChar *ID, xmlNodePtr *lst) {
12676 void *userData;
12677
12678 if (ctx == NULL) return(-1);
12679 /*
12680 * If the user provided their own SAX callbacks, then reuse the
12681 * userData callback field, otherwise the expected setup in a
12682 * DOM builder is to have userData == ctxt
12683 */
12684 if (ctx->userData == ctx)
12685 userData = NULL;
12686 else
12687 userData = ctx->userData;
12688 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12689 userData, ctx->depth + 1,
12690 URL, ID, lst);
12691 }
12692
12693 /**
12694 * xmlParseExternalEntityPrivate:
12695 * @doc: the document the chunk pertains to
12696 * @oldctxt: the previous parser context if available
12697 * @sax: the SAX handler block (possibly NULL)
12698 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12699 * @depth: Used for loop detection, use 0
12700 * @URL: the URL for the entity to load
12701 * @ID: the System ID for the entity to load
12702 * @list: the return value for the set of parsed nodes
12703 *
12704 * Private version of xmlParseExternalEntity()
12705 *
12706 * Returns 0 if the entity is well formed, -1 in case of args problem and
12707 * the parser error code otherwise
12708 */
12709
12710 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12711 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12712 xmlSAXHandlerPtr sax,
12713 void *user_data, int depth, const xmlChar *URL,
12714 const xmlChar *ID, xmlNodePtr *list) {
12715 xmlParserCtxtPtr ctxt;
12716 xmlDocPtr newDoc;
12717 xmlNodePtr newRoot;
12718 xmlParserErrors ret = XML_ERR_OK;
12719
12720 if (((depth > 40) &&
12721 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12722 (depth > 100)) {
12723 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12724 "Maximum entity nesting depth exceeded");
12725 return(XML_ERR_ENTITY_LOOP);
12726 }
12727
12728 if (list != NULL)
12729 *list = NULL;
12730 if ((URL == NULL) && (ID == NULL))
12731 return(XML_ERR_INTERNAL_ERROR);
12732 if (doc == NULL)
12733 return(XML_ERR_INTERNAL_ERROR);
12734
12735 ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12736 oldctxt);
12737 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12738 if (oldctxt != NULL) {
12739 ctxt->nbErrors = oldctxt->nbErrors;
12740 ctxt->nbWarnings = oldctxt->nbWarnings;
12741 }
12742 xmlDetectSAX2(ctxt);
12743
12744 newDoc = xmlNewDoc(BAD_CAST "1.0");
12745 if (newDoc == NULL) {
12746 xmlFreeParserCtxt(ctxt);
12747 return(XML_ERR_INTERNAL_ERROR);
12748 }
12749 newDoc->properties = XML_DOC_INTERNAL;
12750 if (doc) {
12751 newDoc->intSubset = doc->intSubset;
12752 newDoc->extSubset = doc->extSubset;
12753 if (doc->dict) {
12754 newDoc->dict = doc->dict;
12755 xmlDictReference(newDoc->dict);
12756 }
12757 if (doc->URL != NULL) {
12758 newDoc->URL = xmlStrdup(doc->URL);
12759 }
12760 }
12761 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12762 if (newRoot == NULL) {
12763 if (sax != NULL)
12764 xmlFreeParserCtxt(ctxt);
12765 newDoc->intSubset = NULL;
12766 newDoc->extSubset = NULL;
12767 xmlFreeDoc(newDoc);
12768 return(XML_ERR_INTERNAL_ERROR);
12769 }
12770 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12771 nodePush(ctxt, newDoc->children);
12772 if (doc == NULL) {
12773 ctxt->myDoc = newDoc;
12774 } else {
12775 ctxt->myDoc = doc;
12776 newRoot->doc = doc;
12777 }
12778
12779 xmlDetectEncoding(ctxt);
12780
12781 /*
12782 * Parse a possible text declaration first
12783 */
12784 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12785 xmlParseTextDecl(ctxt);
12786 /*
12787 * An XML-1.0 document can't reference an entity not XML-1.0
12788 */
12789 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12790 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12791 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12792 "Version mismatch between document and entity\n");
12793 }
12794 }
12795
12796 ctxt->instate = XML_PARSER_CONTENT;
12797 ctxt->depth = depth;
12798 if (oldctxt != NULL) {
12799 ctxt->_private = oldctxt->_private;
12800 ctxt->loadsubset = oldctxt->loadsubset;
12801 ctxt->validate = oldctxt->validate;
12802 ctxt->valid = oldctxt->valid;
12803 ctxt->replaceEntities = oldctxt->replaceEntities;
12804 if (oldctxt->validate) {
12805 ctxt->vctxt.error = oldctxt->vctxt.error;
12806 ctxt->vctxt.warning = oldctxt->vctxt.warning;
12807 ctxt->vctxt.userData = oldctxt->vctxt.userData;
12808 ctxt->vctxt.flags = oldctxt->vctxt.flags;
12809 }
12810 ctxt->external = oldctxt->external;
12811 if (ctxt->dict) xmlDictFree(ctxt->dict);
12812 ctxt->dict = oldctxt->dict;
12813 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12814 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12815 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12816 ctxt->dictNames = oldctxt->dictNames;
12817 ctxt->attsDefault = oldctxt->attsDefault;
12818 ctxt->attsSpecial = oldctxt->attsSpecial;
12819 ctxt->linenumbers = oldctxt->linenumbers;
12820 ctxt->record_info = oldctxt->record_info;
12821 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12822 ctxt->node_seq.length = oldctxt->node_seq.length;
12823 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12824 } else {
12825 /*
12826 * Doing validity checking on chunk without context
12827 * doesn't make sense
12828 */
12829 ctxt->_private = NULL;
12830 ctxt->validate = 0;
12831 ctxt->external = 2;
12832 ctxt->loadsubset = 0;
12833 }
12834
12835 xmlParseContent(ctxt);
12836
12837 if ((RAW == '<') && (NXT(1) == '/')) {
12838 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12839 } else if (RAW != 0) {
12840 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12841 }
12842 if (ctxt->node != newDoc->children) {
12843 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12844 }
12845
12846 if (!ctxt->wellFormed) {
12847 ret = (xmlParserErrors)ctxt->errNo;
12848 if (oldctxt != NULL) {
12849 oldctxt->errNo = ctxt->errNo;
12850 oldctxt->wellFormed = 0;
12851 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12852 }
12853 } else {
12854 if (list != NULL) {
12855 xmlNodePtr cur;
12856
12857 /*
12858 * Return the newly created nodeset after unlinking it from
12859 * they pseudo parent.
12860 */
12861 cur = newDoc->children->children;
12862 *list = cur;
12863 while (cur != NULL) {
12864 cur->parent = NULL;
12865 cur = cur->next;
12866 }
12867 newDoc->children->children = NULL;
12868 }
12869 ret = XML_ERR_OK;
12870 }
12871
12872 /*
12873 * Also record the size of the entity parsed
12874 */
12875 if (ctxt->input != NULL && oldctxt != NULL) {
12876 unsigned long consumed = ctxt->input->consumed;
12877
12878 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12879
12880 xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12881 xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12882
12883 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12884 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12885 }
12886
12887 if (oldctxt != NULL) {
12888 ctxt->dict = NULL;
12889 ctxt->attsDefault = NULL;
12890 ctxt->attsSpecial = NULL;
12891 oldctxt->nbErrors = ctxt->nbErrors;
12892 oldctxt->nbWarnings = ctxt->nbWarnings;
12893 oldctxt->validate = ctxt->validate;
12894 oldctxt->valid = ctxt->valid;
12895 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12896 oldctxt->node_seq.length = ctxt->node_seq.length;
12897 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12898 }
12899 ctxt->node_seq.maximum = 0;
12900 ctxt->node_seq.length = 0;
12901 ctxt->node_seq.buffer = NULL;
12902 xmlFreeParserCtxt(ctxt);
12903 newDoc->intSubset = NULL;
12904 newDoc->extSubset = NULL;
12905 xmlFreeDoc(newDoc);
12906
12907 return(ret);
12908 }
12909
12910 #ifdef LIBXML_SAX1_ENABLED
12911 /**
12912 * xmlParseExternalEntity:
12913 * @doc: the document the chunk pertains to
12914 * @sax: the SAX handler block (possibly NULL)
12915 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12916 * @depth: Used for loop detection, use 0
12917 * @URL: the URL for the entity to load
12918 * @ID: the System ID for the entity to load
12919 * @lst: the return value for the set of parsed nodes
12920 *
12921 * Parse an external general entity
12922 * An external general parsed entity is well-formed if it matches the
12923 * production labeled extParsedEnt.
12924 *
12925 * [78] extParsedEnt ::= TextDecl? content
12926 *
12927 * Returns 0 if the entity is well formed, -1 in case of args problem and
12928 * the parser error code otherwise
12929 */
12930
12931 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12932 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12933 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12934 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12935 ID, lst));
12936 }
12937
12938 /**
12939 * xmlParseBalancedChunkMemory:
12940 * @doc: the document the chunk pertains to (must not be NULL)
12941 * @sax: the SAX handler block (possibly NULL)
12942 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12943 * @depth: Used for loop detection, use 0
12944 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12945 * @lst: the return value for the set of parsed nodes
12946 *
12947 * Parse a well-balanced chunk of an XML document
12948 * called by the parser
12949 * The allowed sequence for the Well Balanced Chunk is the one defined by
12950 * the content production in the XML grammar:
12951 *
12952 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12953 *
12954 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12955 * the parser error code otherwise
12956 */
12957
12958 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12959 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12960 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12961 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12962 depth, string, lst, 0 );
12963 }
12964 #endif /* LIBXML_SAX1_ENABLED */
12965
12966 /**
12967 * xmlParseBalancedChunkMemoryInternal:
12968 * @oldctxt: the existing parsing context
12969 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12970 * @user_data: the user data field for the parser context
12971 * @lst: the return value for the set of parsed nodes
12972 *
12973 *
12974 * Parse a well-balanced chunk of an XML document
12975 * called by the parser
12976 * The allowed sequence for the Well Balanced Chunk is the one defined by
12977 * the content production in the XML grammar:
12978 *
12979 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12980 *
12981 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12982 * error code otherwise
12983 *
12984 * In case recover is set to 1, the nodelist will not be empty even if
12985 * the parsed chunk is not well balanced.
12986 */
12987 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12988 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12989 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12990 xmlParserCtxtPtr ctxt;
12991 xmlDocPtr newDoc = NULL;
12992 xmlNodePtr newRoot;
12993 xmlSAXHandlerPtr oldsax = NULL;
12994 xmlNodePtr content = NULL;
12995 xmlNodePtr last = NULL;
12996 xmlParserErrors ret = XML_ERR_OK;
12997 xmlHashedString hprefix, huri;
12998 unsigned i;
12999
13000 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13001 (oldctxt->depth > 100)) {
13002 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13003 "Maximum entity nesting depth exceeded");
13004 return(XML_ERR_ENTITY_LOOP);
13005 }
13006
13007
13008 if (lst != NULL)
13009 *lst = NULL;
13010 if (string == NULL)
13011 return(XML_ERR_INTERNAL_ERROR);
13012
13013 ctxt = xmlCreateDocParserCtxt(string);
13014 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13015 ctxt->nbErrors = oldctxt->nbErrors;
13016 ctxt->nbWarnings = oldctxt->nbWarnings;
13017 if (user_data != NULL)
13018 ctxt->userData = user_data;
13019 else
13020 ctxt->userData = ctxt;
13021 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13022 ctxt->dict = oldctxt->dict;
13023 ctxt->input_id = oldctxt->input_id;
13024 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13025 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13026 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13027
13028 /*
13029 * Propagate namespaces down the entity
13030 *
13031 * Making entities and namespaces work correctly requires additional
13032 * changes, see xmlParseReference.
13033 */
13034
13035 /* Default namespace */
13036 hprefix.name = NULL;
13037 hprefix.hashValue = 0;
13038 huri.name = xmlParserNsLookupUri(oldctxt, &hprefix);
13039 huri.hashValue = 0;
13040 if (huri.name != NULL)
13041 xmlParserNsPush(ctxt, NULL, &huri, NULL, 0);
13042
13043 for (i = 0; i < oldctxt->nsdb->hashSize; i++) {
13044 xmlParserNsBucket *bucket = &oldctxt->nsdb->hash[i];
13045 const xmlChar **ns;
13046 xmlParserNsExtra *extra;
13047 unsigned nsIndex;
13048
13049 if ((bucket->hashValue != 0) &&
13050 (bucket->index != INT_MAX)) {
13051 nsIndex = bucket->index;
13052 ns = &oldctxt->nsTab[nsIndex * 2];
13053 extra = &oldctxt->nsdb->extra[nsIndex];
13054
13055 hprefix.name = ns[0];
13056 hprefix.hashValue = bucket->hashValue;
13057 huri.name = ns[1];
13058 huri.hashValue = extra->uriHashValue;
13059 /*
13060 * Don't copy SAX data to avoid a use-after-free with XML reader.
13061 * This matches the pre-2.12 behavior.
13062 */
13063 xmlParserNsPush(ctxt, &hprefix, &huri, NULL, 0);
13064 }
13065 }
13066
13067 oldsax = ctxt->sax;
13068 ctxt->sax = oldctxt->sax;
13069 xmlDetectSAX2(ctxt);
13070 ctxt->replaceEntities = oldctxt->replaceEntities;
13071 ctxt->options = oldctxt->options;
13072
13073 ctxt->_private = oldctxt->_private;
13074 if (oldctxt->myDoc == NULL) {
13075 newDoc = xmlNewDoc(BAD_CAST "1.0");
13076 if (newDoc == NULL) {
13077 ret = XML_ERR_INTERNAL_ERROR;
13078 goto error;
13079 }
13080 newDoc->properties = XML_DOC_INTERNAL;
13081 newDoc->dict = ctxt->dict;
13082 xmlDictReference(newDoc->dict);
13083 ctxt->myDoc = newDoc;
13084 } else {
13085 ctxt->myDoc = oldctxt->myDoc;
13086 content = ctxt->myDoc->children;
13087 last = ctxt->myDoc->last;
13088 }
13089 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13090 if (newRoot == NULL) {
13091 ret = XML_ERR_INTERNAL_ERROR;
13092 goto error;
13093 }
13094 ctxt->myDoc->children = NULL;
13095 ctxt->myDoc->last = NULL;
13096 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13097 nodePush(ctxt, ctxt->myDoc->children);
13098 ctxt->instate = XML_PARSER_CONTENT;
13099 ctxt->depth = oldctxt->depth;
13100
13101 ctxt->validate = 0;
13102 ctxt->loadsubset = oldctxt->loadsubset;
13103 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13104 /*
13105 * ID/IDREF registration will be done in xmlValidateElement below
13106 */
13107 ctxt->loadsubset |= XML_SKIP_IDS;
13108 }
13109 ctxt->dictNames = oldctxt->dictNames;
13110 ctxt->attsDefault = oldctxt->attsDefault;
13111 ctxt->attsSpecial = oldctxt->attsSpecial;
13112
13113 xmlParseContent(ctxt);
13114 if ((RAW == '<') && (NXT(1) == '/')) {
13115 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13116 } else if (RAW != 0) {
13117 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13118 }
13119 if (ctxt->node != ctxt->myDoc->children) {
13120 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13121 }
13122
13123 if (!ctxt->wellFormed) {
13124 ret = (xmlParserErrors)ctxt->errNo;
13125 oldctxt->errNo = ctxt->errNo;
13126 oldctxt->wellFormed = 0;
13127 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13128 } else {
13129 ret = XML_ERR_OK;
13130 }
13131
13132 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13133 xmlNodePtr cur;
13134
13135 /*
13136 * Return the newly created nodeset after unlinking it from
13137 * they pseudo parent.
13138 */
13139 cur = ctxt->myDoc->children->children;
13140 *lst = cur;
13141 while (cur != NULL) {
13142 #ifdef LIBXML_VALID_ENABLED
13143 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13144 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13145 (cur->type == XML_ELEMENT_NODE)) {
13146 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13147 oldctxt->myDoc, cur);
13148 }
13149 #endif /* LIBXML_VALID_ENABLED */
13150 cur->parent = NULL;
13151 cur = cur->next;
13152 }
13153 ctxt->myDoc->children->children = NULL;
13154 }
13155 if (ctxt->myDoc != NULL) {
13156 xmlFreeNode(ctxt->myDoc->children);
13157 ctxt->myDoc->children = content;
13158 ctxt->myDoc->last = last;
13159 }
13160
13161 /*
13162 * Also record the size of the entity parsed
13163 */
13164 if (ctxt->input != NULL && oldctxt != NULL) {
13165 unsigned long consumed = ctxt->input->consumed;
13166
13167 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13168
13169 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13170 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13171 }
13172
13173 oldctxt->nbErrors = ctxt->nbErrors;
13174 oldctxt->nbWarnings = ctxt->nbWarnings;
13175
13176 error:
13177 ctxt->sax = oldsax;
13178 ctxt->dict = NULL;
13179 ctxt->attsDefault = NULL;
13180 ctxt->attsSpecial = NULL;
13181 xmlFreeParserCtxt(ctxt);
13182 if (newDoc != NULL) {
13183 xmlFreeDoc(newDoc);
13184 }
13185
13186 return(ret);
13187 }
13188
13189 /**
13190 * xmlParseInNodeContext:
13191 * @node: the context node
13192 * @data: the input string
13193 * @datalen: the input string length in bytes
13194 * @options: a combination of xmlParserOption
13195 * @lst: the return value for the set of parsed nodes
13196 *
13197 * Parse a well-balanced chunk of an XML document
13198 * within the context (DTD, namespaces, etc ...) of the given node.
13199 *
13200 * The allowed sequence for the data is a Well Balanced Chunk defined by
13201 * the content production in the XML grammar:
13202 *
13203 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13204 *
13205 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13206 * error code otherwise
13207 */
13208 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)13209 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13210 int options, xmlNodePtr *lst) {
13211 xmlParserCtxtPtr ctxt;
13212 xmlDocPtr doc = NULL;
13213 xmlNodePtr fake, cur;
13214 int nsnr = 0;
13215
13216 xmlParserErrors ret = XML_ERR_OK;
13217
13218 /*
13219 * check all input parameters, grab the document
13220 */
13221 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13222 return(XML_ERR_INTERNAL_ERROR);
13223 switch (node->type) {
13224 case XML_ELEMENT_NODE:
13225 case XML_ATTRIBUTE_NODE:
13226 case XML_TEXT_NODE:
13227 case XML_CDATA_SECTION_NODE:
13228 case XML_ENTITY_REF_NODE:
13229 case XML_PI_NODE:
13230 case XML_COMMENT_NODE:
13231 case XML_DOCUMENT_NODE:
13232 case XML_HTML_DOCUMENT_NODE:
13233 break;
13234 default:
13235 return(XML_ERR_INTERNAL_ERROR);
13236
13237 }
13238 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13239 (node->type != XML_DOCUMENT_NODE) &&
13240 (node->type != XML_HTML_DOCUMENT_NODE))
13241 node = node->parent;
13242 if (node == NULL)
13243 return(XML_ERR_INTERNAL_ERROR);
13244 if (node->type == XML_ELEMENT_NODE)
13245 doc = node->doc;
13246 else
13247 doc = (xmlDocPtr) node;
13248 if (doc == NULL)
13249 return(XML_ERR_INTERNAL_ERROR);
13250
13251 /*
13252 * allocate a context and set-up everything not related to the
13253 * node position in the tree
13254 */
13255 if (doc->type == XML_DOCUMENT_NODE)
13256 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13257 #ifdef LIBXML_HTML_ENABLED
13258 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13259 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13260 /*
13261 * When parsing in context, it makes no sense to add implied
13262 * elements like html/body/etc...
13263 */
13264 options |= HTML_PARSE_NOIMPLIED;
13265 }
13266 #endif
13267 else
13268 return(XML_ERR_INTERNAL_ERROR);
13269
13270 if (ctxt == NULL)
13271 return(XML_ERR_NO_MEMORY);
13272
13273 /*
13274 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13275 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13276 * we must wait until the last moment to free the original one.
13277 */
13278 if (doc->dict != NULL) {
13279 if (ctxt->dict != NULL)
13280 xmlDictFree(ctxt->dict);
13281 ctxt->dict = doc->dict;
13282 } else
13283 options |= XML_PARSE_NODICT;
13284
13285 if (doc->encoding != NULL) {
13286 xmlCharEncodingHandlerPtr hdlr;
13287
13288 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13289 if (hdlr != NULL) {
13290 xmlSwitchToEncoding(ctxt, hdlr);
13291 } else {
13292 return(XML_ERR_UNSUPPORTED_ENCODING);
13293 }
13294 }
13295
13296 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13297 xmlDetectSAX2(ctxt);
13298 ctxt->myDoc = doc;
13299 /* parsing in context, i.e. as within existing content */
13300 ctxt->input_id = 2;
13301 ctxt->instate = XML_PARSER_CONTENT;
13302
13303 fake = xmlNewDocComment(node->doc, NULL);
13304 if (fake == NULL) {
13305 xmlFreeParserCtxt(ctxt);
13306 return(XML_ERR_NO_MEMORY);
13307 }
13308 xmlAddChild(node, fake);
13309
13310 if (node->type == XML_ELEMENT_NODE) {
13311 nodePush(ctxt, node);
13312 /*
13313 * initialize the SAX2 namespaces stack
13314 */
13315 cur = node;
13316 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13317 xmlNsPtr ns = cur->nsDef;
13318 xmlHashedString hprefix, huri;
13319
13320 while (ns != NULL) {
13321 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
13322 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
13323 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
13324 nsnr++;
13325 ns = ns->next;
13326 }
13327 cur = cur->parent;
13328 }
13329 }
13330
13331 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13332 /*
13333 * ID/IDREF registration will be done in xmlValidateElement below
13334 */
13335 ctxt->loadsubset |= XML_SKIP_IDS;
13336 }
13337
13338 #ifdef LIBXML_HTML_ENABLED
13339 if (doc->type == XML_HTML_DOCUMENT_NODE)
13340 __htmlParseContent(ctxt);
13341 else
13342 #endif
13343 xmlParseContent(ctxt);
13344
13345 xmlParserNsPop(ctxt, nsnr);
13346 if ((RAW == '<') && (NXT(1) == '/')) {
13347 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13348 } else if (RAW != 0) {
13349 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13350 }
13351 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13352 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13353 ctxt->wellFormed = 0;
13354 }
13355
13356 if (!ctxt->wellFormed) {
13357 if (ctxt->errNo == 0)
13358 ret = XML_ERR_INTERNAL_ERROR;
13359 else
13360 ret = (xmlParserErrors)ctxt->errNo;
13361 } else {
13362 ret = XML_ERR_OK;
13363 }
13364
13365 /*
13366 * Return the newly created nodeset after unlinking it from
13367 * the pseudo sibling.
13368 */
13369
13370 cur = fake->next;
13371 fake->next = NULL;
13372 node->last = fake;
13373
13374 if (cur != NULL) {
13375 cur->prev = NULL;
13376 }
13377
13378 *lst = cur;
13379
13380 while (cur != NULL) {
13381 cur->parent = NULL;
13382 cur = cur->next;
13383 }
13384
13385 xmlUnlinkNode(fake);
13386 xmlFreeNode(fake);
13387
13388
13389 if (ret != XML_ERR_OK) {
13390 xmlFreeNodeList(*lst);
13391 *lst = NULL;
13392 }
13393
13394 if (doc->dict != NULL)
13395 ctxt->dict = NULL;
13396 xmlFreeParserCtxt(ctxt);
13397
13398 return(ret);
13399 }
13400
13401 #ifdef LIBXML_SAX1_ENABLED
13402 /**
13403 * xmlParseBalancedChunkMemoryRecover:
13404 * @doc: the document the chunk pertains to (must not be NULL)
13405 * @sax: the SAX handler block (possibly NULL)
13406 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13407 * @depth: Used for loop detection, use 0
13408 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13409 * @lst: the return value for the set of parsed nodes
13410 * @recover: return nodes even if the data is broken (use 0)
13411 *
13412 *
13413 * Parse a well-balanced chunk of an XML document
13414 * called by the parser
13415 * The allowed sequence for the Well Balanced Chunk is the one defined by
13416 * the content production in the XML grammar:
13417 *
13418 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13419 *
13420 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13421 * the parser error code otherwise
13422 *
13423 * In case recover is set to 1, the nodelist will not be empty even if
13424 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13425 * some extent.
13426 */
13427 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13428 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13429 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13430 int recover) {
13431 xmlParserCtxtPtr ctxt;
13432 xmlDocPtr newDoc;
13433 xmlSAXHandlerPtr oldsax = NULL;
13434 xmlNodePtr content, newRoot;
13435 int ret = 0;
13436
13437 if (depth > 40) {
13438 return(XML_ERR_ENTITY_LOOP);
13439 }
13440
13441
13442 if (lst != NULL)
13443 *lst = NULL;
13444 if (string == NULL)
13445 return(-1);
13446
13447 ctxt = xmlCreateDocParserCtxt(string);
13448 if (ctxt == NULL) return(-1);
13449 ctxt->userData = ctxt;
13450 if (sax != NULL) {
13451 oldsax = ctxt->sax;
13452 ctxt->sax = sax;
13453 if (user_data != NULL)
13454 ctxt->userData = user_data;
13455 }
13456 newDoc = xmlNewDoc(BAD_CAST "1.0");
13457 if (newDoc == NULL) {
13458 xmlFreeParserCtxt(ctxt);
13459 return(-1);
13460 }
13461 newDoc->properties = XML_DOC_INTERNAL;
13462 if ((doc != NULL) && (doc->dict != NULL)) {
13463 xmlDictFree(ctxt->dict);
13464 ctxt->dict = doc->dict;
13465 xmlDictReference(ctxt->dict);
13466 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13467 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13468 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13469 ctxt->dictNames = 1;
13470 } else {
13471 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13472 }
13473 /* doc == NULL is only supported for historic reasons */
13474 if (doc != NULL) {
13475 newDoc->intSubset = doc->intSubset;
13476 newDoc->extSubset = doc->extSubset;
13477 }
13478 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13479 if (newRoot == NULL) {
13480 if (sax != NULL)
13481 ctxt->sax = oldsax;
13482 xmlFreeParserCtxt(ctxt);
13483 newDoc->intSubset = NULL;
13484 newDoc->extSubset = NULL;
13485 xmlFreeDoc(newDoc);
13486 return(-1);
13487 }
13488 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13489 nodePush(ctxt, newRoot);
13490 /* doc == NULL is only supported for historic reasons */
13491 if (doc == NULL) {
13492 ctxt->myDoc = newDoc;
13493 } else {
13494 ctxt->myDoc = newDoc;
13495 newDoc->children->doc = doc;
13496 /* Ensure that doc has XML spec namespace */
13497 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13498 newDoc->oldNs = doc->oldNs;
13499 }
13500 ctxt->instate = XML_PARSER_CONTENT;
13501 ctxt->input_id = 2;
13502 ctxt->depth = depth;
13503
13504 /*
13505 * Doing validity checking on chunk doesn't make sense
13506 */
13507 ctxt->validate = 0;
13508 ctxt->loadsubset = 0;
13509 xmlDetectSAX2(ctxt);
13510
13511 if ( doc != NULL ){
13512 content = doc->children;
13513 doc->children = NULL;
13514 xmlParseContent(ctxt);
13515 doc->children = content;
13516 }
13517 else {
13518 xmlParseContent(ctxt);
13519 }
13520 if ((RAW == '<') && (NXT(1) == '/')) {
13521 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13522 } else if (RAW != 0) {
13523 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13524 }
13525 if (ctxt->node != newDoc->children) {
13526 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13527 }
13528
13529 if (!ctxt->wellFormed) {
13530 if (ctxt->errNo == 0)
13531 ret = 1;
13532 else
13533 ret = ctxt->errNo;
13534 } else {
13535 ret = 0;
13536 }
13537
13538 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13539 xmlNodePtr cur;
13540
13541 /*
13542 * Return the newly created nodeset after unlinking it from
13543 * they pseudo parent.
13544 */
13545 cur = newDoc->children->children;
13546 *lst = cur;
13547 while (cur != NULL) {
13548 xmlSetTreeDoc(cur, doc);
13549 cur->parent = NULL;
13550 cur = cur->next;
13551 }
13552 newDoc->children->children = NULL;
13553 }
13554
13555 if (sax != NULL)
13556 ctxt->sax = oldsax;
13557 xmlFreeParserCtxt(ctxt);
13558 newDoc->intSubset = NULL;
13559 newDoc->extSubset = NULL;
13560 /* This leaks the namespace list if doc == NULL */
13561 newDoc->oldNs = NULL;
13562 xmlFreeDoc(newDoc);
13563
13564 return(ret);
13565 }
13566
13567 /**
13568 * xmlSAXParseEntity:
13569 * @sax: the SAX handler block
13570 * @filename: the filename
13571 *
13572 * DEPRECATED: Don't use.
13573 *
13574 * parse an XML external entity out of context and build a tree.
13575 * It use the given SAX function block to handle the parsing callback.
13576 * If sax is NULL, fallback to the default DOM tree building routines.
13577 *
13578 * [78] extParsedEnt ::= TextDecl? content
13579 *
13580 * This correspond to a "Well Balanced" chunk
13581 *
13582 * Returns the resulting document tree
13583 */
13584
13585 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13586 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13587 xmlDocPtr ret;
13588 xmlParserCtxtPtr ctxt;
13589
13590 ctxt = xmlCreateFileParserCtxt(filename);
13591 if (ctxt == NULL) {
13592 return(NULL);
13593 }
13594 if (sax != NULL) {
13595 if (ctxt->sax != NULL)
13596 xmlFree(ctxt->sax);
13597 ctxt->sax = sax;
13598 ctxt->userData = NULL;
13599 }
13600
13601 xmlParseExtParsedEnt(ctxt);
13602
13603 if (ctxt->wellFormed)
13604 ret = ctxt->myDoc;
13605 else {
13606 ret = NULL;
13607 xmlFreeDoc(ctxt->myDoc);
13608 ctxt->myDoc = NULL;
13609 }
13610 if (sax != NULL)
13611 ctxt->sax = NULL;
13612 xmlFreeParserCtxt(ctxt);
13613
13614 return(ret);
13615 }
13616
13617 /**
13618 * xmlParseEntity:
13619 * @filename: the filename
13620 *
13621 * parse an XML external entity out of context and build a tree.
13622 *
13623 * [78] extParsedEnt ::= TextDecl? content
13624 *
13625 * This correspond to a "Well Balanced" chunk
13626 *
13627 * Returns the resulting document tree
13628 */
13629
13630 xmlDocPtr
xmlParseEntity(const char * filename)13631 xmlParseEntity(const char *filename) {
13632 return(xmlSAXParseEntity(NULL, filename));
13633 }
13634 #endif /* LIBXML_SAX1_ENABLED */
13635
13636 /**
13637 * xmlCreateEntityParserCtxtInternal:
13638 * @URL: the entity URL
13639 * @ID: the entity PUBLIC ID
13640 * @base: a possible base for the target URI
13641 * @pctx: parser context used to set options on new context
13642 *
13643 * Create a parser context for an external entity
13644 * Automatic support for ZLIB/Compress compressed document is provided
13645 * by default if found at compile-time.
13646 *
13647 * Returns the new parser context or NULL
13648 */
13649 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax,void * userData,const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13650 xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13651 const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13652 xmlParserCtxtPtr pctx) {
13653 xmlParserCtxtPtr ctxt;
13654 xmlParserInputPtr inputStream;
13655 char *directory = NULL;
13656 xmlChar *uri;
13657
13658 ctxt = xmlNewSAXParserCtxt(sax, userData);
13659 if (ctxt == NULL) {
13660 return(NULL);
13661 }
13662
13663 if (pctx != NULL) {
13664 ctxt->options = pctx->options;
13665 ctxt->_private = pctx->_private;
13666 ctxt->input_id = pctx->input_id;
13667 }
13668
13669 /* Don't read from stdin. */
13670 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13671 URL = BAD_CAST "./-";
13672
13673 uri = xmlBuildURI(URL, base);
13674
13675 if (uri == NULL) {
13676 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13677 if (inputStream == NULL) {
13678 xmlFreeParserCtxt(ctxt);
13679 return(NULL);
13680 }
13681
13682 inputPush(ctxt, inputStream);
13683
13684 if ((ctxt->directory == NULL) && (directory == NULL))
13685 directory = xmlParserGetDirectory((char *)URL);
13686 if ((ctxt->directory == NULL) && (directory != NULL))
13687 ctxt->directory = directory;
13688 } else {
13689 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13690 if (inputStream == NULL) {
13691 xmlFree(uri);
13692 xmlFreeParserCtxt(ctxt);
13693 return(NULL);
13694 }
13695
13696 inputPush(ctxt, inputStream);
13697
13698 if ((ctxt->directory == NULL) && (directory == NULL))
13699 directory = xmlParserGetDirectory((char *)uri);
13700 if ((ctxt->directory == NULL) && (directory != NULL))
13701 ctxt->directory = directory;
13702 xmlFree(uri);
13703 }
13704 return(ctxt);
13705 }
13706
13707 /**
13708 * xmlCreateEntityParserCtxt:
13709 * @URL: the entity URL
13710 * @ID: the entity PUBLIC ID
13711 * @base: a possible base for the target URI
13712 *
13713 * Create a parser context for an external entity
13714 * Automatic support for ZLIB/Compress compressed document is provided
13715 * by default if found at compile-time.
13716 *
13717 * Returns the new parser context or NULL
13718 */
13719 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13720 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13721 const xmlChar *base) {
13722 return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13723
13724 }
13725
13726 /************************************************************************
13727 * *
13728 * Front ends when parsing from a file *
13729 * *
13730 ************************************************************************/
13731
13732 /**
13733 * xmlCreateURLParserCtxt:
13734 * @filename: the filename or URL
13735 * @options: a combination of xmlParserOption
13736 *
13737 * Create a parser context for a file or URL content.
13738 * Automatic support for ZLIB/Compress compressed document is provided
13739 * by default if found at compile-time and for file accesses
13740 *
13741 * Returns the new parser context or NULL
13742 */
13743 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13744 xmlCreateURLParserCtxt(const char *filename, int options)
13745 {
13746 xmlParserCtxtPtr ctxt;
13747 xmlParserInputPtr inputStream;
13748 char *directory = NULL;
13749
13750 ctxt = xmlNewParserCtxt();
13751 if (ctxt == NULL) {
13752 xmlErrMemory(NULL, "cannot allocate parser context");
13753 return(NULL);
13754 }
13755
13756 if (options)
13757 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13758 ctxt->linenumbers = 1;
13759
13760 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13761 if (inputStream == NULL) {
13762 xmlFreeParserCtxt(ctxt);
13763 return(NULL);
13764 }
13765
13766 inputPush(ctxt, inputStream);
13767 if ((ctxt->directory == NULL) && (directory == NULL))
13768 directory = xmlParserGetDirectory(filename);
13769 if ((ctxt->directory == NULL) && (directory != NULL))
13770 ctxt->directory = directory;
13771
13772 return(ctxt);
13773 }
13774
13775 /**
13776 * xmlCreateFileParserCtxt:
13777 * @filename: the filename
13778 *
13779 * Create a parser context for a file content.
13780 * Automatic support for ZLIB/Compress compressed document is provided
13781 * by default if found at compile-time.
13782 *
13783 * Returns the new parser context or NULL
13784 */
13785 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13786 xmlCreateFileParserCtxt(const char *filename)
13787 {
13788 return(xmlCreateURLParserCtxt(filename, 0));
13789 }
13790
13791 #ifdef LIBXML_SAX1_ENABLED
13792 /**
13793 * xmlSAXParseFileWithData:
13794 * @sax: the SAX handler block
13795 * @filename: the filename
13796 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13797 * documents
13798 * @data: the userdata
13799 *
13800 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13801 *
13802 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13803 * compressed document is provided by default if found at compile-time.
13804 * It use the given SAX function block to handle the parsing callback.
13805 * If sax is NULL, fallback to the default DOM tree building routines.
13806 *
13807 * User data (void *) is stored within the parser context in the
13808 * context's _private member, so it is available nearly everywhere in libxml
13809 *
13810 * Returns the resulting document tree
13811 */
13812
13813 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13814 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13815 int recovery, void *data) {
13816 xmlDocPtr ret;
13817 xmlParserCtxtPtr ctxt;
13818
13819 xmlInitParser();
13820
13821 ctxt = xmlCreateFileParserCtxt(filename);
13822 if (ctxt == NULL) {
13823 return(NULL);
13824 }
13825 if (sax != NULL) {
13826 if (ctxt->sax != NULL)
13827 xmlFree(ctxt->sax);
13828 ctxt->sax = sax;
13829 }
13830 xmlDetectSAX2(ctxt);
13831 if (data!=NULL) {
13832 ctxt->_private = data;
13833 }
13834
13835 if (ctxt->directory == NULL)
13836 ctxt->directory = xmlParserGetDirectory(filename);
13837
13838 ctxt->recovery = recovery;
13839
13840 xmlParseDocument(ctxt);
13841
13842 if ((ctxt->wellFormed) || recovery) {
13843 ret = ctxt->myDoc;
13844 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13845 if (ctxt->input->buf->compressed > 0)
13846 ret->compression = 9;
13847 else
13848 ret->compression = ctxt->input->buf->compressed;
13849 }
13850 }
13851 else {
13852 ret = NULL;
13853 xmlFreeDoc(ctxt->myDoc);
13854 ctxt->myDoc = NULL;
13855 }
13856 if (sax != NULL)
13857 ctxt->sax = NULL;
13858 xmlFreeParserCtxt(ctxt);
13859
13860 return(ret);
13861 }
13862
13863 /**
13864 * xmlSAXParseFile:
13865 * @sax: the SAX handler block
13866 * @filename: the filename
13867 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13868 * documents
13869 *
13870 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13871 *
13872 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13873 * compressed document is provided by default if found at compile-time.
13874 * It use the given SAX function block to handle the parsing callback.
13875 * If sax is NULL, fallback to the default DOM tree building routines.
13876 *
13877 * Returns the resulting document tree
13878 */
13879
13880 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13881 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13882 int recovery) {
13883 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13884 }
13885
13886 /**
13887 * xmlRecoverDoc:
13888 * @cur: a pointer to an array of xmlChar
13889 *
13890 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13891 *
13892 * parse an XML in-memory document and build a tree.
13893 * In the case the document is not Well Formed, a attempt to build a
13894 * tree is tried anyway
13895 *
13896 * Returns the resulting document tree or NULL in case of failure
13897 */
13898
13899 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)13900 xmlRecoverDoc(const xmlChar *cur) {
13901 return(xmlSAXParseDoc(NULL, cur, 1));
13902 }
13903
13904 /**
13905 * xmlParseFile:
13906 * @filename: the filename
13907 *
13908 * DEPRECATED: Use xmlReadFile.
13909 *
13910 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13911 * compressed document is provided by default if found at compile-time.
13912 *
13913 * Returns the resulting document tree if the file was wellformed,
13914 * NULL otherwise.
13915 */
13916
13917 xmlDocPtr
xmlParseFile(const char * filename)13918 xmlParseFile(const char *filename) {
13919 return(xmlSAXParseFile(NULL, filename, 0));
13920 }
13921
13922 /**
13923 * xmlRecoverFile:
13924 * @filename: the filename
13925 *
13926 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13927 *
13928 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13929 * compressed document is provided by default if found at compile-time.
13930 * In the case the document is not Well Formed, it attempts to build
13931 * a tree anyway
13932 *
13933 * Returns the resulting document tree or NULL in case of failure
13934 */
13935
13936 xmlDocPtr
xmlRecoverFile(const char * filename)13937 xmlRecoverFile(const char *filename) {
13938 return(xmlSAXParseFile(NULL, filename, 1));
13939 }
13940
13941
13942 /**
13943 * xmlSetupParserForBuffer:
13944 * @ctxt: an XML parser context
13945 * @buffer: a xmlChar * buffer
13946 * @filename: a file name
13947 *
13948 * DEPRECATED: Don't use.
13949 *
13950 * Setup the parser context to parse a new buffer; Clears any prior
13951 * contents from the parser context. The buffer parameter must not be
13952 * NULL, but the filename parameter can be
13953 */
13954 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13955 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13956 const char* filename)
13957 {
13958 xmlParserInputPtr input;
13959
13960 if ((ctxt == NULL) || (buffer == NULL))
13961 return;
13962
13963 input = xmlNewInputStream(ctxt);
13964 if (input == NULL) {
13965 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13966 xmlClearParserCtxt(ctxt);
13967 return;
13968 }
13969
13970 xmlClearParserCtxt(ctxt);
13971 if (filename != NULL)
13972 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13973 input->base = buffer;
13974 input->cur = buffer;
13975 input->end = &buffer[xmlStrlen(buffer)];
13976 inputPush(ctxt, input);
13977 }
13978
13979 /**
13980 * xmlSAXUserParseFile:
13981 * @sax: a SAX handler
13982 * @user_data: The user data returned on SAX callbacks
13983 * @filename: a file name
13984 *
13985 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13986 *
13987 * parse an XML file and call the given SAX handler routines.
13988 * Automatic support for ZLIB/Compress compressed document is provided
13989 *
13990 * Returns 0 in case of success or a error number otherwise
13991 */
13992 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13993 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13994 const char *filename) {
13995 int ret = 0;
13996 xmlParserCtxtPtr ctxt;
13997
13998 ctxt = xmlCreateFileParserCtxt(filename);
13999 if (ctxt == NULL) return -1;
14000 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14001 xmlFree(ctxt->sax);
14002 ctxt->sax = sax;
14003 xmlDetectSAX2(ctxt);
14004
14005 if (user_data != NULL)
14006 ctxt->userData = user_data;
14007
14008 xmlParseDocument(ctxt);
14009
14010 if (ctxt->wellFormed)
14011 ret = 0;
14012 else {
14013 if (ctxt->errNo != 0)
14014 ret = ctxt->errNo;
14015 else
14016 ret = -1;
14017 }
14018 if (sax != NULL)
14019 ctxt->sax = NULL;
14020 if (ctxt->myDoc != NULL) {
14021 xmlFreeDoc(ctxt->myDoc);
14022 ctxt->myDoc = NULL;
14023 }
14024 xmlFreeParserCtxt(ctxt);
14025
14026 return ret;
14027 }
14028 #endif /* LIBXML_SAX1_ENABLED */
14029
14030 /************************************************************************
14031 * *
14032 * Front ends when parsing from memory *
14033 * *
14034 ************************************************************************/
14035
14036 /**
14037 * xmlCreateMemoryParserCtxt:
14038 * @buffer: a pointer to a char array
14039 * @size: the size of the array
14040 *
14041 * Create a parser context for an XML in-memory document.
14042 *
14043 * Returns the new parser context or NULL
14044 */
14045 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)14046 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14047 xmlParserCtxtPtr ctxt;
14048 xmlParserInputPtr input;
14049 xmlParserInputBufferPtr buf;
14050
14051 if (buffer == NULL)
14052 return(NULL);
14053 if (size <= 0)
14054 return(NULL);
14055
14056 ctxt = xmlNewParserCtxt();
14057 if (ctxt == NULL)
14058 return(NULL);
14059
14060 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14061 if (buf == NULL) {
14062 xmlFreeParserCtxt(ctxt);
14063 return(NULL);
14064 }
14065
14066 input = xmlNewInputStream(ctxt);
14067 if (input == NULL) {
14068 xmlFreeParserInputBuffer(buf);
14069 xmlFreeParserCtxt(ctxt);
14070 return(NULL);
14071 }
14072
14073 input->filename = NULL;
14074 input->buf = buf;
14075 xmlBufResetInput(input->buf->buffer, input);
14076
14077 inputPush(ctxt, input);
14078 return(ctxt);
14079 }
14080
14081 #ifdef LIBXML_SAX1_ENABLED
14082 /**
14083 * xmlSAXParseMemoryWithData:
14084 * @sax: the SAX handler block
14085 * @buffer: an pointer to a char array
14086 * @size: the size of the array
14087 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14088 * documents
14089 * @data: the userdata
14090 *
14091 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14092 *
14093 * parse an XML in-memory block and use the given SAX function block
14094 * to handle the parsing callback. If sax is NULL, fallback to the default
14095 * DOM tree building routines.
14096 *
14097 * User data (void *) is stored within the parser context in the
14098 * context's _private member, so it is available nearly everywhere in libxml
14099 *
14100 * Returns the resulting document tree
14101 */
14102
14103 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)14104 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14105 int size, int recovery, void *data) {
14106 xmlDocPtr ret;
14107 xmlParserCtxtPtr ctxt;
14108
14109 xmlInitParser();
14110
14111 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14112 if (ctxt == NULL) return(NULL);
14113 if (sax != NULL) {
14114 if (ctxt->sax != NULL)
14115 xmlFree(ctxt->sax);
14116 ctxt->sax = sax;
14117 }
14118 xmlDetectSAX2(ctxt);
14119 if (data!=NULL) {
14120 ctxt->_private=data;
14121 }
14122
14123 ctxt->recovery = recovery;
14124
14125 xmlParseDocument(ctxt);
14126
14127 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14128 else {
14129 ret = NULL;
14130 xmlFreeDoc(ctxt->myDoc);
14131 ctxt->myDoc = NULL;
14132 }
14133 if (sax != NULL)
14134 ctxt->sax = NULL;
14135 xmlFreeParserCtxt(ctxt);
14136
14137 return(ret);
14138 }
14139
14140 /**
14141 * xmlSAXParseMemory:
14142 * @sax: the SAX handler block
14143 * @buffer: an pointer to a char array
14144 * @size: the size of the array
14145 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14146 * documents
14147 *
14148 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14149 *
14150 * parse an XML in-memory block and use the given SAX function block
14151 * to handle the parsing callback. If sax is NULL, fallback to the default
14152 * DOM tree building routines.
14153 *
14154 * Returns the resulting document tree
14155 */
14156 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)14157 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14158 int size, int recovery) {
14159 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14160 }
14161
14162 /**
14163 * xmlParseMemory:
14164 * @buffer: an pointer to a char array
14165 * @size: the size of the array
14166 *
14167 * DEPRECATED: Use xmlReadMemory.
14168 *
14169 * parse an XML in-memory block and build a tree.
14170 *
14171 * Returns the resulting document tree
14172 */
14173
xmlParseMemory(const char * buffer,int size)14174 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14175 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14176 }
14177
14178 /**
14179 * xmlRecoverMemory:
14180 * @buffer: an pointer to a char array
14181 * @size: the size of the array
14182 *
14183 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14184 *
14185 * parse an XML in-memory block and build a tree.
14186 * In the case the document is not Well Formed, an attempt to
14187 * build a tree is tried anyway
14188 *
14189 * Returns the resulting document tree or NULL in case of error
14190 */
14191
xmlRecoverMemory(const char * buffer,int size)14192 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14193 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14194 }
14195
14196 /**
14197 * xmlSAXUserParseMemory:
14198 * @sax: a SAX handler
14199 * @user_data: The user data returned on SAX callbacks
14200 * @buffer: an in-memory XML document input
14201 * @size: the length of the XML document in bytes
14202 *
14203 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14204 *
14205 * parse an XML in-memory buffer and call the given SAX handler routines.
14206 *
14207 * Returns 0 in case of success or a error number otherwise
14208 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)14209 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14210 const char *buffer, int size) {
14211 int ret = 0;
14212 xmlParserCtxtPtr ctxt;
14213
14214 xmlInitParser();
14215
14216 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14217 if (ctxt == NULL) return -1;
14218 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14219 xmlFree(ctxt->sax);
14220 ctxt->sax = sax;
14221 xmlDetectSAX2(ctxt);
14222
14223 if (user_data != NULL)
14224 ctxt->userData = user_data;
14225
14226 xmlParseDocument(ctxt);
14227
14228 if (ctxt->wellFormed)
14229 ret = 0;
14230 else {
14231 if (ctxt->errNo != 0)
14232 ret = ctxt->errNo;
14233 else
14234 ret = -1;
14235 }
14236 if (sax != NULL)
14237 ctxt->sax = NULL;
14238 if (ctxt->myDoc != NULL) {
14239 xmlFreeDoc(ctxt->myDoc);
14240 ctxt->myDoc = NULL;
14241 }
14242 xmlFreeParserCtxt(ctxt);
14243
14244 return ret;
14245 }
14246 #endif /* LIBXML_SAX1_ENABLED */
14247
14248 /**
14249 * xmlCreateDocParserCtxt:
14250 * @str: a pointer to an array of xmlChar
14251 *
14252 * Creates a parser context for an XML in-memory document.
14253 *
14254 * Returns the new parser context or NULL
14255 */
14256 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * str)14257 xmlCreateDocParserCtxt(const xmlChar *str) {
14258 xmlParserCtxtPtr ctxt;
14259 xmlParserInputPtr input;
14260 xmlParserInputBufferPtr buf;
14261
14262 if (str == NULL)
14263 return(NULL);
14264
14265 ctxt = xmlNewParserCtxt();
14266 if (ctxt == NULL)
14267 return(NULL);
14268
14269 buf = xmlParserInputBufferCreateString(str);
14270 if (buf == NULL) {
14271 xmlFreeParserCtxt(ctxt);
14272 return(NULL);
14273 }
14274
14275 input = xmlNewInputStream(ctxt);
14276 if (input == NULL) {
14277 xmlFreeParserInputBuffer(buf);
14278 xmlFreeParserCtxt(ctxt);
14279 return(NULL);
14280 }
14281
14282 input->filename = NULL;
14283 input->buf = buf;
14284 xmlBufResetInput(input->buf->buffer, input);
14285
14286 inputPush(ctxt, input);
14287 return(ctxt);
14288 }
14289
14290 #ifdef LIBXML_SAX1_ENABLED
14291 /**
14292 * xmlSAXParseDoc:
14293 * @sax: the SAX handler block
14294 * @cur: a pointer to an array of xmlChar
14295 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14296 * documents
14297 *
14298 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14299 *
14300 * parse an XML in-memory document and build a tree.
14301 * It use the given SAX function block to handle the parsing callback.
14302 * If sax is NULL, fallback to the default DOM tree building routines.
14303 *
14304 * Returns the resulting document tree
14305 */
14306
14307 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14308 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14309 xmlDocPtr ret;
14310 xmlParserCtxtPtr ctxt;
14311 xmlSAXHandlerPtr oldsax = NULL;
14312
14313 if (cur == NULL) return(NULL);
14314
14315
14316 ctxt = xmlCreateDocParserCtxt(cur);
14317 if (ctxt == NULL) return(NULL);
14318 if (sax != NULL) {
14319 oldsax = ctxt->sax;
14320 ctxt->sax = sax;
14321 ctxt->userData = NULL;
14322 }
14323 xmlDetectSAX2(ctxt);
14324
14325 xmlParseDocument(ctxt);
14326 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14327 else {
14328 ret = NULL;
14329 xmlFreeDoc(ctxt->myDoc);
14330 ctxt->myDoc = NULL;
14331 }
14332 if (sax != NULL)
14333 ctxt->sax = oldsax;
14334 xmlFreeParserCtxt(ctxt);
14335
14336 return(ret);
14337 }
14338
14339 /**
14340 * xmlParseDoc:
14341 * @cur: a pointer to an array of xmlChar
14342 *
14343 * DEPRECATED: Use xmlReadDoc.
14344 *
14345 * parse an XML in-memory document and build a tree.
14346 *
14347 * Returns the resulting document tree
14348 */
14349
14350 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14351 xmlParseDoc(const xmlChar *cur) {
14352 return(xmlSAXParseDoc(NULL, cur, 0));
14353 }
14354 #endif /* LIBXML_SAX1_ENABLED */
14355
14356 #ifdef LIBXML_LEGACY_ENABLED
14357 /************************************************************************
14358 * *
14359 * Specific function to keep track of entities references *
14360 * and used by the XSLT debugger *
14361 * *
14362 ************************************************************************/
14363
14364 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14365
14366 /**
14367 * xmlAddEntityReference:
14368 * @ent : A valid entity
14369 * @firstNode : A valid first node for children of entity
14370 * @lastNode : A valid last node of children entity
14371 *
14372 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14373 */
14374 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14375 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14376 xmlNodePtr lastNode)
14377 {
14378 if (xmlEntityRefFunc != NULL) {
14379 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14380 }
14381 }
14382
14383
14384 /**
14385 * xmlSetEntityReferenceFunc:
14386 * @func: A valid function
14387 *
14388 * Set the function to call call back when a xml reference has been made
14389 */
14390 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14391 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14392 {
14393 xmlEntityRefFunc = func;
14394 }
14395 #endif /* LIBXML_LEGACY_ENABLED */
14396
14397 /************************************************************************
14398 * *
14399 * New set (2.6.0) of simpler and more flexible APIs *
14400 * *
14401 ************************************************************************/
14402
14403 /**
14404 * DICT_FREE:
14405 * @str: a string
14406 *
14407 * Free a string if it is not owned by the "dict" dictionary in the
14408 * current scope
14409 */
14410 #define DICT_FREE(str) \
14411 if ((str) && ((!dict) || \
14412 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14413 xmlFree((char *)(str));
14414
14415 /**
14416 * xmlCtxtReset:
14417 * @ctxt: an XML parser context
14418 *
14419 * Reset a parser context
14420 */
14421 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14422 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14423 {
14424 xmlParserInputPtr input;
14425 xmlDictPtr dict;
14426
14427 if (ctxt == NULL)
14428 return;
14429
14430 dict = ctxt->dict;
14431
14432 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14433 xmlFreeInputStream(input);
14434 }
14435 ctxt->inputNr = 0;
14436 ctxt->input = NULL;
14437
14438 ctxt->spaceNr = 0;
14439 if (ctxt->spaceTab != NULL) {
14440 ctxt->spaceTab[0] = -1;
14441 ctxt->space = &ctxt->spaceTab[0];
14442 } else {
14443 ctxt->space = NULL;
14444 }
14445
14446
14447 ctxt->nodeNr = 0;
14448 ctxt->node = NULL;
14449
14450 ctxt->nameNr = 0;
14451 ctxt->name = NULL;
14452
14453 ctxt->nsNr = 0;
14454 xmlParserNsReset(ctxt->nsdb);
14455
14456 DICT_FREE(ctxt->version);
14457 ctxt->version = NULL;
14458 DICT_FREE(ctxt->encoding);
14459 ctxt->encoding = NULL;
14460 DICT_FREE(ctxt->directory);
14461 ctxt->directory = NULL;
14462 DICT_FREE(ctxt->extSubURI);
14463 ctxt->extSubURI = NULL;
14464 DICT_FREE(ctxt->extSubSystem);
14465 ctxt->extSubSystem = NULL;
14466 if (ctxt->myDoc != NULL)
14467 xmlFreeDoc(ctxt->myDoc);
14468 ctxt->myDoc = NULL;
14469
14470 ctxt->standalone = -1;
14471 ctxt->hasExternalSubset = 0;
14472 ctxt->hasPErefs = 0;
14473 ctxt->html = 0;
14474 ctxt->external = 0;
14475 ctxt->instate = XML_PARSER_START;
14476 ctxt->token = 0;
14477
14478 ctxt->wellFormed = 1;
14479 ctxt->nsWellFormed = 1;
14480 ctxt->disableSAX = 0;
14481 ctxt->valid = 1;
14482 #if 0
14483 ctxt->vctxt.userData = ctxt;
14484 ctxt->vctxt.error = xmlParserValidityError;
14485 ctxt->vctxt.warning = xmlParserValidityWarning;
14486 #endif
14487 ctxt->record_info = 0;
14488 ctxt->checkIndex = 0;
14489 ctxt->endCheckState = 0;
14490 ctxt->inSubset = 0;
14491 ctxt->errNo = XML_ERR_OK;
14492 ctxt->depth = 0;
14493 ctxt->catalogs = NULL;
14494 ctxt->sizeentities = 0;
14495 ctxt->sizeentcopy = 0;
14496 xmlInitNodeInfoSeq(&ctxt->node_seq);
14497
14498 if (ctxt->attsDefault != NULL) {
14499 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14500 ctxt->attsDefault = NULL;
14501 }
14502 if (ctxt->attsSpecial != NULL) {
14503 xmlHashFree(ctxt->attsSpecial, NULL);
14504 ctxt->attsSpecial = NULL;
14505 }
14506
14507 #ifdef LIBXML_CATALOG_ENABLED
14508 if (ctxt->catalogs != NULL)
14509 xmlCatalogFreeLocal(ctxt->catalogs);
14510 #endif
14511 ctxt->nbErrors = 0;
14512 ctxt->nbWarnings = 0;
14513 if (ctxt->lastError.code != XML_ERR_OK)
14514 xmlResetError(&ctxt->lastError);
14515 }
14516
14517 /**
14518 * xmlCtxtResetPush:
14519 * @ctxt: an XML parser context
14520 * @chunk: a pointer to an array of chars
14521 * @size: number of chars in the array
14522 * @filename: an optional file name or URI
14523 * @encoding: the document encoding, or NULL
14524 *
14525 * Reset a push parser context
14526 *
14527 * Returns 0 in case of success and 1 in case of error
14528 */
14529 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14530 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14531 int size, const char *filename, const char *encoding)
14532 {
14533 xmlParserInputPtr inputStream;
14534 xmlParserInputBufferPtr buf;
14535
14536 if (ctxt == NULL)
14537 return(1);
14538
14539 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
14540 if (buf == NULL)
14541 return(1);
14542
14543 if (ctxt == NULL) {
14544 xmlFreeParserInputBuffer(buf);
14545 return(1);
14546 }
14547
14548 xmlCtxtReset(ctxt);
14549
14550 if (filename == NULL) {
14551 ctxt->directory = NULL;
14552 } else {
14553 ctxt->directory = xmlParserGetDirectory(filename);
14554 }
14555
14556 inputStream = xmlNewInputStream(ctxt);
14557 if (inputStream == NULL) {
14558 xmlFreeParserInputBuffer(buf);
14559 return(1);
14560 }
14561
14562 if (filename == NULL)
14563 inputStream->filename = NULL;
14564 else
14565 inputStream->filename = (char *)
14566 xmlCanonicPath((const xmlChar *) filename);
14567 inputStream->buf = buf;
14568 xmlBufResetInput(buf->buffer, inputStream);
14569
14570 inputPush(ctxt, inputStream);
14571
14572 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14573 (ctxt->input->buf != NULL)) {
14574 size_t pos = ctxt->input->cur - ctxt->input->base;
14575 int res;
14576
14577 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14578 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
14579 if (res < 0) {
14580 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
14581 xmlHaltParser(ctxt);
14582 return(1);
14583 }
14584 }
14585
14586 if (encoding != NULL) {
14587 xmlCharEncodingHandlerPtr hdlr;
14588
14589 hdlr = xmlFindCharEncodingHandler(encoding);
14590 if (hdlr != NULL) {
14591 xmlSwitchToEncoding(ctxt, hdlr);
14592 } else {
14593 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14594 "Unsupported encoding %s\n", BAD_CAST encoding);
14595 }
14596 }
14597
14598 return(0);
14599 }
14600
14601
14602 /**
14603 * xmlCtxtUseOptionsInternal:
14604 * @ctxt: an XML parser context
14605 * @options: a combination of xmlParserOption
14606 * @encoding: the user provided encoding to use
14607 *
14608 * Applies the options to the parser context
14609 *
14610 * Returns 0 in case of success, the set of unknown or unimplemented options
14611 * in case of error.
14612 */
14613 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14614 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14615 {
14616 if (ctxt == NULL)
14617 return(-1);
14618 if (encoding != NULL) {
14619 if (ctxt->encoding != NULL)
14620 xmlFree((xmlChar *) ctxt->encoding);
14621 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14622 }
14623 if (options & XML_PARSE_RECOVER) {
14624 ctxt->recovery = 1;
14625 options -= XML_PARSE_RECOVER;
14626 ctxt->options |= XML_PARSE_RECOVER;
14627 } else
14628 ctxt->recovery = 0;
14629 if (options & XML_PARSE_DTDLOAD) {
14630 ctxt->loadsubset = XML_DETECT_IDS;
14631 options -= XML_PARSE_DTDLOAD;
14632 ctxt->options |= XML_PARSE_DTDLOAD;
14633 } else
14634 ctxt->loadsubset = 0;
14635 if (options & XML_PARSE_DTDATTR) {
14636 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14637 options -= XML_PARSE_DTDATTR;
14638 ctxt->options |= XML_PARSE_DTDATTR;
14639 }
14640 if (options & XML_PARSE_NOENT) {
14641 ctxt->replaceEntities = 1;
14642 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14643 options -= XML_PARSE_NOENT;
14644 ctxt->options |= XML_PARSE_NOENT;
14645 } else
14646 ctxt->replaceEntities = 0;
14647 if (options & XML_PARSE_PEDANTIC) {
14648 ctxt->pedantic = 1;
14649 options -= XML_PARSE_PEDANTIC;
14650 ctxt->options |= XML_PARSE_PEDANTIC;
14651 } else
14652 ctxt->pedantic = 0;
14653 if (options & XML_PARSE_NOBLANKS) {
14654 ctxt->keepBlanks = 0;
14655 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14656 options -= XML_PARSE_NOBLANKS;
14657 ctxt->options |= XML_PARSE_NOBLANKS;
14658 } else
14659 ctxt->keepBlanks = 1;
14660 if (options & XML_PARSE_DTDVALID) {
14661 ctxt->validate = 1;
14662 if (options & XML_PARSE_NOWARNING)
14663 ctxt->vctxt.warning = NULL;
14664 if (options & XML_PARSE_NOERROR)
14665 ctxt->vctxt.error = NULL;
14666 options -= XML_PARSE_DTDVALID;
14667 ctxt->options |= XML_PARSE_DTDVALID;
14668 } else
14669 ctxt->validate = 0;
14670 if (options & XML_PARSE_NOWARNING) {
14671 ctxt->sax->warning = NULL;
14672 options -= XML_PARSE_NOWARNING;
14673 }
14674 if (options & XML_PARSE_NOERROR) {
14675 ctxt->sax->error = NULL;
14676 ctxt->sax->fatalError = NULL;
14677 options -= XML_PARSE_NOERROR;
14678 }
14679 #ifdef LIBXML_SAX1_ENABLED
14680 if (options & XML_PARSE_SAX1) {
14681 ctxt->sax->startElementNs = NULL;
14682 ctxt->sax->endElementNs = NULL;
14683 ctxt->sax->initialized = 1;
14684 options -= XML_PARSE_SAX1;
14685 ctxt->options |= XML_PARSE_SAX1;
14686 }
14687 #endif /* LIBXML_SAX1_ENABLED */
14688 if (options & XML_PARSE_NODICT) {
14689 ctxt->dictNames = 0;
14690 options -= XML_PARSE_NODICT;
14691 ctxt->options |= XML_PARSE_NODICT;
14692 } else {
14693 ctxt->dictNames = 1;
14694 }
14695 if (options & XML_PARSE_NOCDATA) {
14696 ctxt->sax->cdataBlock = NULL;
14697 options -= XML_PARSE_NOCDATA;
14698 ctxt->options |= XML_PARSE_NOCDATA;
14699 }
14700 if (options & XML_PARSE_NSCLEAN) {
14701 ctxt->options |= XML_PARSE_NSCLEAN;
14702 options -= XML_PARSE_NSCLEAN;
14703 }
14704 if (options & XML_PARSE_NONET) {
14705 ctxt->options |= XML_PARSE_NONET;
14706 options -= XML_PARSE_NONET;
14707 }
14708 if (options & XML_PARSE_COMPACT) {
14709 ctxt->options |= XML_PARSE_COMPACT;
14710 options -= XML_PARSE_COMPACT;
14711 }
14712 if (options & XML_PARSE_OLD10) {
14713 ctxt->options |= XML_PARSE_OLD10;
14714 options -= XML_PARSE_OLD10;
14715 }
14716 if (options & XML_PARSE_NOBASEFIX) {
14717 ctxt->options |= XML_PARSE_NOBASEFIX;
14718 options -= XML_PARSE_NOBASEFIX;
14719 }
14720 if (options & XML_PARSE_HUGE) {
14721 ctxt->options |= XML_PARSE_HUGE;
14722 options -= XML_PARSE_HUGE;
14723 if (ctxt->dict != NULL)
14724 xmlDictSetLimit(ctxt->dict, 0);
14725 }
14726 if (options & XML_PARSE_OLDSAX) {
14727 ctxt->options |= XML_PARSE_OLDSAX;
14728 options -= XML_PARSE_OLDSAX;
14729 }
14730 if (options & XML_PARSE_IGNORE_ENC) {
14731 ctxt->options |= XML_PARSE_IGNORE_ENC;
14732 options -= XML_PARSE_IGNORE_ENC;
14733 }
14734 if (options & XML_PARSE_BIG_LINES) {
14735 ctxt->options |= XML_PARSE_BIG_LINES;
14736 options -= XML_PARSE_BIG_LINES;
14737 }
14738 ctxt->linenumbers = 1;
14739 return (options);
14740 }
14741
14742 /**
14743 * xmlCtxtUseOptions:
14744 * @ctxt: an XML parser context
14745 * @options: a combination of xmlParserOption
14746 *
14747 * Applies the options to the parser context
14748 *
14749 * Returns 0 in case of success, the set of unknown or unimplemented options
14750 * in case of error.
14751 */
14752 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14753 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14754 {
14755 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14756 }
14757
14758 /**
14759 * xmlCtxtSetMaxAmplification:
14760 * @ctxt: an XML parser context
14761 * @maxAmpl: maximum amplification factor
14762 *
14763 * To protect against exponential entity expansion ("billion laughs"), the
14764 * size of serialized output is (roughly) limited to the input size
14765 * multiplied by this factor. The default value is 5.
14766 *
14767 * When working with documents making heavy use of entity expansion, it can
14768 * be necessary to increase the value. For security reasons, this should only
14769 * be considered when processing trusted input.
14770 */
14771 void
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,unsigned maxAmpl)14772 xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
14773 {
14774 ctxt->maxAmpl = maxAmpl;
14775 }
14776
14777 /**
14778 * xmlDoRead:
14779 * @ctxt: an XML parser context
14780 * @URL: the base URL to use for the document
14781 * @encoding: the document encoding, or NULL
14782 * @options: a combination of xmlParserOption
14783 * @reuse: keep the context for reuse
14784 *
14785 * Common front-end for the xmlRead functions
14786 *
14787 * Returns the resulting document tree or NULL
14788 */
14789 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14790 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14791 int options, int reuse)
14792 {
14793 xmlDocPtr ret;
14794
14795 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14796 if (encoding != NULL) {
14797 xmlCharEncodingHandlerPtr hdlr;
14798
14799 /*
14800 * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14801 * caller provided an encoding. Otherwise, we might switch to
14802 * the encoding from the XML declaration which is likely to
14803 * break things. Also see xmlSwitchInputEncoding.
14804 */
14805 hdlr = xmlFindCharEncodingHandler(encoding);
14806 if (hdlr != NULL)
14807 xmlSwitchToEncoding(ctxt, hdlr);
14808 }
14809 if ((URL != NULL) && (ctxt->input != NULL) &&
14810 (ctxt->input->filename == NULL))
14811 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14812 xmlParseDocument(ctxt);
14813 if ((ctxt->wellFormed) || ctxt->recovery)
14814 ret = ctxt->myDoc;
14815 else {
14816 ret = NULL;
14817 if (ctxt->myDoc != NULL) {
14818 xmlFreeDoc(ctxt->myDoc);
14819 }
14820 }
14821 ctxt->myDoc = NULL;
14822 if (!reuse) {
14823 xmlFreeParserCtxt(ctxt);
14824 }
14825
14826 return (ret);
14827 }
14828
14829 /**
14830 * xmlReadDoc:
14831 * @cur: a pointer to a zero terminated string
14832 * @URL: the base URL to use for the document
14833 * @encoding: the document encoding, or NULL
14834 * @options: a combination of xmlParserOption
14835 *
14836 * parse an XML in-memory document and build a tree.
14837 *
14838 * Returns the resulting document tree
14839 */
14840 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14841 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14842 {
14843 xmlParserCtxtPtr ctxt;
14844
14845 if (cur == NULL)
14846 return (NULL);
14847 xmlInitParser();
14848
14849 ctxt = xmlCreateDocParserCtxt(cur);
14850 if (ctxt == NULL)
14851 return (NULL);
14852 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14853 }
14854
14855 /**
14856 * xmlReadFile:
14857 * @filename: a file or URL
14858 * @encoding: the document encoding, or NULL
14859 * @options: a combination of xmlParserOption
14860 *
14861 * parse an XML file from the filesystem or the network.
14862 *
14863 * Returns the resulting document tree
14864 */
14865 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14866 xmlReadFile(const char *filename, const char *encoding, int options)
14867 {
14868 xmlParserCtxtPtr ctxt;
14869
14870 xmlInitParser();
14871 ctxt = xmlCreateURLParserCtxt(filename, options);
14872 if (ctxt == NULL)
14873 return (NULL);
14874 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14875 }
14876
14877 /**
14878 * xmlReadMemory:
14879 * @buffer: a pointer to a char array
14880 * @size: the size of the array
14881 * @URL: the base URL to use for the document
14882 * @encoding: the document encoding, or NULL
14883 * @options: a combination of xmlParserOption
14884 *
14885 * parse an XML in-memory document and build a tree.
14886 *
14887 * Returns the resulting document tree
14888 */
14889 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14890 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14891 {
14892 xmlParserCtxtPtr ctxt;
14893
14894 xmlInitParser();
14895 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14896 if (ctxt == NULL)
14897 return (NULL);
14898 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14899 }
14900
14901 /**
14902 * xmlReadFd:
14903 * @fd: an open file descriptor
14904 * @URL: the base URL to use for the document
14905 * @encoding: the document encoding, or NULL
14906 * @options: a combination of xmlParserOption
14907 *
14908 * parse an XML from a file descriptor and build a tree.
14909 * NOTE that the file descriptor will not be closed when the
14910 * reader is closed or reset.
14911 *
14912 * Returns the resulting document tree
14913 */
14914 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14915 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14916 {
14917 xmlParserCtxtPtr ctxt;
14918 xmlParserInputBufferPtr input;
14919 xmlParserInputPtr stream;
14920
14921 if (fd < 0)
14922 return (NULL);
14923 xmlInitParser();
14924
14925 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14926 if (input == NULL)
14927 return (NULL);
14928 input->closecallback = NULL;
14929 ctxt = xmlNewParserCtxt();
14930 if (ctxt == NULL) {
14931 xmlFreeParserInputBuffer(input);
14932 return (NULL);
14933 }
14934 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14935 if (stream == NULL) {
14936 xmlFreeParserInputBuffer(input);
14937 xmlFreeParserCtxt(ctxt);
14938 return (NULL);
14939 }
14940 inputPush(ctxt, stream);
14941 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14942 }
14943
14944 /**
14945 * xmlReadIO:
14946 * @ioread: an I/O read function
14947 * @ioclose: an I/O close function
14948 * @ioctx: an I/O handler
14949 * @URL: the base URL to use for the document
14950 * @encoding: the document encoding, or NULL
14951 * @options: a combination of xmlParserOption
14952 *
14953 * parse an XML document from I/O functions and source and build a tree.
14954 *
14955 * Returns the resulting document tree
14956 */
14957 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14958 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14959 void *ioctx, const char *URL, const char *encoding, int options)
14960 {
14961 xmlParserCtxtPtr ctxt;
14962 xmlParserInputBufferPtr input;
14963 xmlParserInputPtr stream;
14964
14965 if (ioread == NULL)
14966 return (NULL);
14967 xmlInitParser();
14968
14969 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14970 XML_CHAR_ENCODING_NONE);
14971 if (input == NULL) {
14972 if (ioclose != NULL)
14973 ioclose(ioctx);
14974 return (NULL);
14975 }
14976 ctxt = xmlNewParserCtxt();
14977 if (ctxt == NULL) {
14978 xmlFreeParserInputBuffer(input);
14979 return (NULL);
14980 }
14981 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14982 if (stream == NULL) {
14983 xmlFreeParserInputBuffer(input);
14984 xmlFreeParserCtxt(ctxt);
14985 return (NULL);
14986 }
14987 inputPush(ctxt, stream);
14988 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14989 }
14990
14991 /**
14992 * xmlCtxtReadDoc:
14993 * @ctxt: an XML parser context
14994 * @str: a pointer to a zero terminated string
14995 * @URL: the base URL to use for the document
14996 * @encoding: the document encoding, or NULL
14997 * @options: a combination of xmlParserOption
14998 *
14999 * parse an XML in-memory document and build a tree.
15000 * This reuses the existing @ctxt parser context
15001 *
15002 * Returns the resulting document tree
15003 */
15004 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)15005 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
15006 const char *URL, const char *encoding, int options)
15007 {
15008 xmlParserInputBufferPtr input;
15009 xmlParserInputPtr stream;
15010
15011 if (ctxt == NULL)
15012 return (NULL);
15013 if (str == NULL)
15014 return (NULL);
15015 xmlInitParser();
15016
15017 xmlCtxtReset(ctxt);
15018
15019 input = xmlParserInputBufferCreateString(str);
15020 if (input == NULL) {
15021 return(NULL);
15022 }
15023
15024 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15025 if (stream == NULL) {
15026 xmlFreeParserInputBuffer(input);
15027 return(NULL);
15028 }
15029
15030 inputPush(ctxt, stream);
15031 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15032 }
15033
15034 /**
15035 * xmlCtxtReadFile:
15036 * @ctxt: an XML parser context
15037 * @filename: a file or URL
15038 * @encoding: the document encoding, or NULL
15039 * @options: a combination of xmlParserOption
15040 *
15041 * parse an XML file from the filesystem or the network.
15042 * This reuses the existing @ctxt parser context
15043 *
15044 * Returns the resulting document tree
15045 */
15046 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)15047 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15048 const char *encoding, int options)
15049 {
15050 xmlParserInputPtr stream;
15051
15052 if (filename == NULL)
15053 return (NULL);
15054 if (ctxt == NULL)
15055 return (NULL);
15056 xmlInitParser();
15057
15058 xmlCtxtReset(ctxt);
15059
15060 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15061 if (stream == NULL) {
15062 return (NULL);
15063 }
15064 inputPush(ctxt, stream);
15065 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15066 }
15067
15068 /**
15069 * xmlCtxtReadMemory:
15070 * @ctxt: an XML parser context
15071 * @buffer: a pointer to a char array
15072 * @size: the size of the array
15073 * @URL: the base URL to use for the document
15074 * @encoding: the document encoding, or NULL
15075 * @options: a combination of xmlParserOption
15076 *
15077 * parse an XML in-memory document and build a tree.
15078 * This reuses the existing @ctxt parser context
15079 *
15080 * Returns the resulting document tree
15081 */
15082 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)15083 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15084 const char *URL, const char *encoding, int options)
15085 {
15086 xmlParserInputBufferPtr input;
15087 xmlParserInputPtr stream;
15088
15089 if (ctxt == NULL)
15090 return (NULL);
15091 if (buffer == NULL)
15092 return (NULL);
15093 xmlInitParser();
15094
15095 xmlCtxtReset(ctxt);
15096
15097 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15098 if (input == NULL) {
15099 return(NULL);
15100 }
15101
15102 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15103 if (stream == NULL) {
15104 xmlFreeParserInputBuffer(input);
15105 return(NULL);
15106 }
15107
15108 inputPush(ctxt, stream);
15109 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15110 }
15111
15112 /**
15113 * xmlCtxtReadFd:
15114 * @ctxt: an XML parser context
15115 * @fd: an open file descriptor
15116 * @URL: the base URL to use for the document
15117 * @encoding: the document encoding, or NULL
15118 * @options: a combination of xmlParserOption
15119 *
15120 * parse an XML from a file descriptor and build a tree.
15121 * This reuses the existing @ctxt parser context
15122 * NOTE that the file descriptor will not be closed when the
15123 * reader is closed or reset.
15124 *
15125 * Returns the resulting document tree
15126 */
15127 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)15128 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15129 const char *URL, const char *encoding, int options)
15130 {
15131 xmlParserInputBufferPtr input;
15132 xmlParserInputPtr stream;
15133
15134 if (fd < 0)
15135 return (NULL);
15136 if (ctxt == NULL)
15137 return (NULL);
15138 xmlInitParser();
15139
15140 xmlCtxtReset(ctxt);
15141
15142
15143 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15144 if (input == NULL)
15145 return (NULL);
15146 input->closecallback = NULL;
15147 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15148 if (stream == NULL) {
15149 xmlFreeParserInputBuffer(input);
15150 return (NULL);
15151 }
15152 inputPush(ctxt, stream);
15153 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15154 }
15155
15156 /**
15157 * xmlCtxtReadIO:
15158 * @ctxt: an XML parser context
15159 * @ioread: an I/O read function
15160 * @ioclose: an I/O close function
15161 * @ioctx: an I/O handler
15162 * @URL: the base URL to use for the document
15163 * @encoding: the document encoding, or NULL
15164 * @options: a combination of xmlParserOption
15165 *
15166 * parse an XML document from I/O functions and source and build a tree.
15167 * This reuses the existing @ctxt parser context
15168 *
15169 * Returns the resulting document tree
15170 */
15171 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)15172 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15173 xmlInputCloseCallback ioclose, void *ioctx,
15174 const char *URL,
15175 const char *encoding, int options)
15176 {
15177 xmlParserInputBufferPtr input;
15178 xmlParserInputPtr stream;
15179
15180 if (ioread == NULL)
15181 return (NULL);
15182 if (ctxt == NULL)
15183 return (NULL);
15184 xmlInitParser();
15185
15186 xmlCtxtReset(ctxt);
15187
15188 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15189 XML_CHAR_ENCODING_NONE);
15190 if (input == NULL) {
15191 if (ioclose != NULL)
15192 ioclose(ioctx);
15193 return (NULL);
15194 }
15195 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15196 if (stream == NULL) {
15197 xmlFreeParserInputBuffer(input);
15198 return (NULL);
15199 }
15200 inputPush(ctxt, stream);
15201 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15202 }
15203
15204