1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
82
83 static void
84 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
86 /************************************************************************
87 * *
88 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
89 * *
90 ************************************************************************/
91
92 #define XML_PARSER_BIG_ENTITY 1000
93 #define XML_PARSER_LOT_ENTITY 5000
94
95 /*
96 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
97 * replacement over the size in byte of the input indicates that you have
98 * and eponential behaviour. A value of 10 correspond to at least 3 entity
99 * replacement per byte of input.
100 */
101 #define XML_PARSER_NON_LINEAR 10
102
103 /*
104 * xmlParserEntityCheck
105 *
106 * Function to check non-linear entity expansion behaviour
107 * This is here to detect and stop exponential linear entity expansion
108 * This is not a limitation of the parser but a safety
109 * boundary feature. It can be disabled with the XML_PARSE_HUGE
110 * parser option.
111 */
112 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long size,xmlEntityPtr ent)113 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
114 xmlEntityPtr ent)
115 {
116 unsigned long consumed = 0;
117
118 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
119 return (0);
120 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
121 return (1);
122 if (size != 0) {
123 /*
124 * Do the check based on the replacement size of the entity
125 */
126 if (size < XML_PARSER_BIG_ENTITY)
127 return(0);
128
129 /*
130 * A limit on the amount of text data reasonably used
131 */
132 if (ctxt->input != NULL) {
133 consumed = ctxt->input->consumed +
134 (ctxt->input->cur - ctxt->input->base);
135 }
136 consumed += ctxt->sizeentities;
137
138 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
139 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
140 return (0);
141 } else if (ent != NULL) {
142 /*
143 * use the number of parsed entities in the replacement
144 */
145 size = ent->checked;
146
147 /*
148 * The amount of data parsed counting entities size only once
149 */
150 if (ctxt->input != NULL) {
151 consumed = ctxt->input->consumed +
152 (ctxt->input->cur - ctxt->input->base);
153 }
154 consumed += ctxt->sizeentities;
155
156 /*
157 * Check the density of entities for the amount of data
158 * knowing an entity reference will take at least 3 bytes
159 */
160 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
161 return (0);
162 } else {
163 /*
164 * strange we got no data for checking just return
165 */
166 return (0);
167 }
168
169 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
170 return (1);
171 }
172
173 /**
174 * xmlParserMaxDepth:
175 *
176 * arbitrary depth limit for the XML documents that we allow to
177 * process. This is not a limitation of the parser but a safety
178 * boundary feature. It can be disabled with the XML_PARSE_HUGE
179 * parser option.
180 */
181 unsigned int xmlParserMaxDepth = 256;
182
183
184
185 #define SAX2 1
186 #define XML_PARSER_BIG_BUFFER_SIZE 300
187 #define XML_PARSER_BUFFER_SIZE 100
188 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
189
190 /*
191 * List of XML prefixed PI allowed by W3C specs
192 */
193
194 static const char *xmlW3CPIs[] = {
195 "xml-stylesheet",
196 NULL
197 };
198
199
200 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
201 xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202 const xmlChar **str);
203
204 static xmlParserErrors
205 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206 xmlSAXHandlerPtr sax,
207 void *user_data, int depth, const xmlChar *URL,
208 const xmlChar *ID, xmlNodePtr *list);
209
210 static int
211 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
212 const char *encoding);
213 #ifdef LIBXML_LEGACY_ENABLED
214 static void
215 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
216 xmlNodePtr lastNode);
217 #endif /* LIBXML_LEGACY_ENABLED */
218
219 static xmlParserErrors
220 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
221 const xmlChar *string, void *user_data, xmlNodePtr *lst);
222
223 static int
224 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
225
226 /************************************************************************
227 * *
228 * Some factorized error routines *
229 * *
230 ************************************************************************/
231
232 /**
233 * xmlErrAttributeDup:
234 * @ctxt: an XML parser context
235 * @prefix: the attribute prefix
236 * @localname: the attribute localname
237 *
238 * Handle a redefinition of attribute error
239 */
240 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)241 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
242 const xmlChar * localname)
243 {
244 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
245 (ctxt->instate == XML_PARSER_EOF))
246 return;
247 if (ctxt != NULL)
248 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
249 if (prefix == NULL)
250 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
251 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
252 (const char *) localname, NULL, NULL, 0, 0,
253 "Attribute %s redefined\n", localname);
254 else
255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
257 (const char *) prefix, (const char *) localname,
258 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
259 localname);
260 if (ctxt != NULL) {
261 ctxt->wellFormed = 0;
262 if (ctxt->recovery == 0)
263 ctxt->disableSAX = 1;
264 }
265 }
266
267 /**
268 * xmlFatalErr:
269 * @ctxt: an XML parser context
270 * @error: the error number
271 * @extra: extra information string
272 *
273 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
274 */
275 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)276 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
277 {
278 const char *errmsg;
279
280 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
281 (ctxt->instate == XML_PARSER_EOF))
282 return;
283 switch (error) {
284 case XML_ERR_INVALID_HEX_CHARREF:
285 errmsg = "CharRef: invalid hexadecimal value\n";
286 break;
287 case XML_ERR_INVALID_DEC_CHARREF:
288 errmsg = "CharRef: invalid decimal value\n";
289 break;
290 case XML_ERR_INVALID_CHARREF:
291 errmsg = "CharRef: invalid value\n";
292 break;
293 case XML_ERR_INTERNAL_ERROR:
294 errmsg = "internal error";
295 break;
296 case XML_ERR_PEREF_AT_EOF:
297 errmsg = "PEReference at end of document\n";
298 break;
299 case XML_ERR_PEREF_IN_PROLOG:
300 errmsg = "PEReference in prolog\n";
301 break;
302 case XML_ERR_PEREF_IN_EPILOG:
303 errmsg = "PEReference in epilog\n";
304 break;
305 case XML_ERR_PEREF_NO_NAME:
306 errmsg = "PEReference: no name\n";
307 break;
308 case XML_ERR_PEREF_SEMICOL_MISSING:
309 errmsg = "PEReference: expecting ';'\n";
310 break;
311 case XML_ERR_ENTITY_LOOP:
312 errmsg = "Detected an entity reference loop\n";
313 break;
314 case XML_ERR_ENTITY_NOT_STARTED:
315 errmsg = "EntityValue: \" or ' expected\n";
316 break;
317 case XML_ERR_ENTITY_PE_INTERNAL:
318 errmsg = "PEReferences forbidden in internal subset\n";
319 break;
320 case XML_ERR_ENTITY_NOT_FINISHED:
321 errmsg = "EntityValue: \" or ' expected\n";
322 break;
323 case XML_ERR_ATTRIBUTE_NOT_STARTED:
324 errmsg = "AttValue: \" or ' expected\n";
325 break;
326 case XML_ERR_LT_IN_ATTRIBUTE:
327 errmsg = "Unescaped '<' not allowed in attributes values\n";
328 break;
329 case XML_ERR_LITERAL_NOT_STARTED:
330 errmsg = "SystemLiteral \" or ' expected\n";
331 break;
332 case XML_ERR_LITERAL_NOT_FINISHED:
333 errmsg = "Unfinished System or Public ID \" or ' expected\n";
334 break;
335 case XML_ERR_MISPLACED_CDATA_END:
336 errmsg = "Sequence ']]>' not allowed in content\n";
337 break;
338 case XML_ERR_URI_REQUIRED:
339 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
340 break;
341 case XML_ERR_PUBID_REQUIRED:
342 errmsg = "PUBLIC, the Public Identifier is missing\n";
343 break;
344 case XML_ERR_HYPHEN_IN_COMMENT:
345 errmsg = "Comment must not contain '--' (double-hyphen)\n";
346 break;
347 case XML_ERR_PI_NOT_STARTED:
348 errmsg = "xmlParsePI : no target name\n";
349 break;
350 case XML_ERR_RESERVED_XML_NAME:
351 errmsg = "Invalid PI name\n";
352 break;
353 case XML_ERR_NOTATION_NOT_STARTED:
354 errmsg = "NOTATION: Name expected here\n";
355 break;
356 case XML_ERR_NOTATION_NOT_FINISHED:
357 errmsg = "'>' required to close NOTATION declaration\n";
358 break;
359 case XML_ERR_VALUE_REQUIRED:
360 errmsg = "Entity value required\n";
361 break;
362 case XML_ERR_URI_FRAGMENT:
363 errmsg = "Fragment not allowed";
364 break;
365 case XML_ERR_ATTLIST_NOT_STARTED:
366 errmsg = "'(' required to start ATTLIST enumeration\n";
367 break;
368 case XML_ERR_NMTOKEN_REQUIRED:
369 errmsg = "NmToken expected in ATTLIST enumeration\n";
370 break;
371 case XML_ERR_ATTLIST_NOT_FINISHED:
372 errmsg = "')' required to finish ATTLIST enumeration\n";
373 break;
374 case XML_ERR_MIXED_NOT_STARTED:
375 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
376 break;
377 case XML_ERR_PCDATA_REQUIRED:
378 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
379 break;
380 case XML_ERR_ELEMCONTENT_NOT_STARTED:
381 errmsg = "ContentDecl : Name or '(' expected\n";
382 break;
383 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
384 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
385 break;
386 case XML_ERR_PEREF_IN_INT_SUBSET:
387 errmsg =
388 "PEReference: forbidden within markup decl in internal subset\n";
389 break;
390 case XML_ERR_GT_REQUIRED:
391 errmsg = "expected '>'\n";
392 break;
393 case XML_ERR_CONDSEC_INVALID:
394 errmsg = "XML conditional section '[' expected\n";
395 break;
396 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
397 errmsg = "Content error in the external subset\n";
398 break;
399 case XML_ERR_CONDSEC_INVALID_KEYWORD:
400 errmsg =
401 "conditional section INCLUDE or IGNORE keyword expected\n";
402 break;
403 case XML_ERR_CONDSEC_NOT_FINISHED:
404 errmsg = "XML conditional section not closed\n";
405 break;
406 case XML_ERR_XMLDECL_NOT_STARTED:
407 errmsg = "Text declaration '<?xml' required\n";
408 break;
409 case XML_ERR_XMLDECL_NOT_FINISHED:
410 errmsg = "parsing XML declaration: '?>' expected\n";
411 break;
412 case XML_ERR_EXT_ENTITY_STANDALONE:
413 errmsg = "external parsed entities cannot be standalone\n";
414 break;
415 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
416 errmsg = "EntityRef: expecting ';'\n";
417 break;
418 case XML_ERR_DOCTYPE_NOT_FINISHED:
419 errmsg = "DOCTYPE improperly terminated\n";
420 break;
421 case XML_ERR_LTSLASH_REQUIRED:
422 errmsg = "EndTag: '</' not found\n";
423 break;
424 case XML_ERR_EQUAL_REQUIRED:
425 errmsg = "expected '='\n";
426 break;
427 case XML_ERR_STRING_NOT_CLOSED:
428 errmsg = "String not closed expecting \" or '\n";
429 break;
430 case XML_ERR_STRING_NOT_STARTED:
431 errmsg = "String not started expecting ' or \"\n";
432 break;
433 case XML_ERR_ENCODING_NAME:
434 errmsg = "Invalid XML encoding name\n";
435 break;
436 case XML_ERR_STANDALONE_VALUE:
437 errmsg = "standalone accepts only 'yes' or 'no'\n";
438 break;
439 case XML_ERR_DOCUMENT_EMPTY:
440 errmsg = "Document is empty\n";
441 break;
442 case XML_ERR_DOCUMENT_END:
443 errmsg = "Extra content at the end of the document\n";
444 break;
445 case XML_ERR_NOT_WELL_BALANCED:
446 errmsg = "chunk is not well balanced\n";
447 break;
448 case XML_ERR_EXTRA_CONTENT:
449 errmsg = "extra content at the end of well balanced chunk\n";
450 break;
451 case XML_ERR_VERSION_MISSING:
452 errmsg = "Malformed declaration expecting version\n";
453 break;
454 #if 0
455 case:
456 errmsg = "\n";
457 break;
458 #endif
459 default:
460 errmsg = "Unregistered error message\n";
461 }
462 if (ctxt != NULL)
463 ctxt->errNo = error;
464 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
465 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
466 info);
467 if (ctxt != NULL) {
468 ctxt->wellFormed = 0;
469 if (ctxt->recovery == 0)
470 ctxt->disableSAX = 1;
471 }
472 }
473
474 /**
475 * xmlFatalErrMsg:
476 * @ctxt: an XML parser context
477 * @error: the error number
478 * @msg: the error message
479 *
480 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
481 */
482 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)483 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
484 const char *msg)
485 {
486 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
487 (ctxt->instate == XML_PARSER_EOF))
488 return;
489 if (ctxt != NULL)
490 ctxt->errNo = error;
491 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
492 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
493 if (ctxt != NULL) {
494 ctxt->wellFormed = 0;
495 if (ctxt->recovery == 0)
496 ctxt->disableSAX = 1;
497 }
498 }
499
500 /**
501 * xmlWarningMsg:
502 * @ctxt: an XML parser context
503 * @error: the error number
504 * @msg: the error message
505 * @str1: extra data
506 * @str2: extra data
507 *
508 * Handle a warning.
509 */
510 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)511 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, const xmlChar *str2)
513 {
514 xmlStructuredErrorFunc schannel = NULL;
515
516 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
517 (ctxt->instate == XML_PARSER_EOF))
518 return;
519 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
520 (ctxt->sax->initialized == XML_SAX2_MAGIC))
521 schannel = ctxt->sax->serror;
522 __xmlRaiseError(schannel,
523 (ctxt->sax) ? ctxt->sax->warning : NULL,
524 ctxt->userData,
525 ctxt, NULL, XML_FROM_PARSER, error,
526 XML_ERR_WARNING, NULL, 0,
527 (const char *) str1, (const char *) str2, NULL, 0, 0,
528 msg, (const char *) str1, (const char *) str2);
529 }
530
531 /**
532 * xmlValidityError:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @str1: extra data
537 *
538 * Handle a validity error.
539 */
540 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)541 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
542 const char *msg, const xmlChar *str1, const xmlChar *str2)
543 {
544 xmlStructuredErrorFunc schannel = NULL;
545
546 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
547 (ctxt->instate == XML_PARSER_EOF))
548 return;
549 if (ctxt != NULL) {
550 ctxt->errNo = error;
551 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
552 schannel = ctxt->sax->serror;
553 }
554 __xmlRaiseError(schannel,
555 ctxt->vctxt.error, ctxt->vctxt.userData,
556 ctxt, NULL, XML_FROM_DTD, error,
557 XML_ERR_ERROR, NULL, 0, (const char *) str1,
558 (const char *) str2, NULL, 0, 0,
559 msg, (const char *) str1, (const char *) str2);
560 if (ctxt != NULL) {
561 ctxt->valid = 0;
562 }
563 }
564
565 /**
566 * xmlFatalErrMsgInt:
567 * @ctxt: an XML parser context
568 * @error: the error number
569 * @msg: the error message
570 * @val: an integer value
571 *
572 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
573 */
574 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)575 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
576 const char *msg, int val)
577 {
578 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
579 (ctxt->instate == XML_PARSER_EOF))
580 return;
581 if (ctxt != NULL)
582 ctxt->errNo = error;
583 __xmlRaiseError(NULL, NULL, NULL,
584 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
585 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
586 if (ctxt != NULL) {
587 ctxt->wellFormed = 0;
588 if (ctxt->recovery == 0)
589 ctxt->disableSAX = 1;
590 }
591 }
592
593 /**
594 * xmlFatalErrMsgStrIntStr:
595 * @ctxt: an XML parser context
596 * @error: the error number
597 * @msg: the error message
598 * @str1: an string info
599 * @val: an integer value
600 * @str2: an string info
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
604 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)605 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg, const xmlChar *str1, int val,
607 const xmlChar *str2)
608 {
609 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
610 (ctxt->instate == XML_PARSER_EOF))
611 return;
612 if (ctxt != NULL)
613 ctxt->errNo = error;
614 __xmlRaiseError(NULL, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
616 NULL, 0, (const char *) str1, (const char *) str2,
617 NULL, val, 0, msg, str1, val, str2);
618 if (ctxt != NULL) {
619 ctxt->wellFormed = 0;
620 if (ctxt->recovery == 0)
621 ctxt->disableSAX = 1;
622 }
623 }
624
625 /**
626 * xmlFatalErrMsgStr:
627 * @ctxt: an XML parser context
628 * @error: the error number
629 * @msg: the error message
630 * @val: a string value
631 *
632 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
633 */
634 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)635 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
636 const char *msg, const xmlChar * val)
637 {
638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
641 if (ctxt != NULL)
642 ctxt->errNo = error;
643 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
644 XML_FROM_PARSER, error, XML_ERR_FATAL,
645 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
646 val);
647 if (ctxt != NULL) {
648 ctxt->wellFormed = 0;
649 if (ctxt->recovery == 0)
650 ctxt->disableSAX = 1;
651 }
652 }
653
654 /**
655 * xmlErrMsgStr:
656 * @ctxt: an XML parser context
657 * @error: the error number
658 * @msg: the error message
659 * @val: a string value
660 *
661 * Handle a non fatal parser error
662 */
663 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)664 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
665 const char *msg, const xmlChar * val)
666 {
667 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
668 (ctxt->instate == XML_PARSER_EOF))
669 return;
670 if (ctxt != NULL)
671 ctxt->errNo = error;
672 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
673 XML_FROM_PARSER, error, XML_ERR_ERROR,
674 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
675 val);
676 }
677
678 /**
679 * xmlNsErr:
680 * @ctxt: an XML parser context
681 * @error: the error number
682 * @msg: the message
683 * @info1: extra information string
684 * @info2: extra information string
685 *
686 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
687 */
688 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)689 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
690 const char *msg,
691 const xmlChar * info1, const xmlChar * info2,
692 const xmlChar * info3)
693 {
694 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
695 (ctxt->instate == XML_PARSER_EOF))
696 return;
697 if (ctxt != NULL)
698 ctxt->errNo = error;
699 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
700 XML_ERR_ERROR, NULL, 0, (const char *) info1,
701 (const char *) info2, (const char *) info3, 0, 0, msg,
702 info1, info2, info3);
703 if (ctxt != NULL)
704 ctxt->nsWellFormed = 0;
705 }
706
707 /**
708 * xmlNsWarn
709 * @ctxt: an XML parser context
710 * @error: the error number
711 * @msg: the message
712 * @info1: extra information string
713 * @info2: extra information string
714 *
715 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
716 */
717 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)718 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
719 const char *msg,
720 const xmlChar * info1, const xmlChar * info2,
721 const xmlChar * info3)
722 {
723 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
724 (ctxt->instate == XML_PARSER_EOF))
725 return;
726 if (ctxt != NULL)
727 ctxt->errNo = error;
728 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
729 XML_ERR_WARNING, NULL, 0, (const char *) info1,
730 (const char *) info2, (const char *) info3, 0, 0, msg,
731 info1, info2, info3);
732 }
733
734 /************************************************************************
735 * *
736 * Library wide options *
737 * *
738 ************************************************************************/
739
740 /**
741 * xmlHasFeature:
742 * @feature: the feature to be examined
743 *
744 * Examines if the library has been compiled with a given feature.
745 *
746 * Returns a non-zero value if the feature exist, otherwise zero.
747 * Returns zero (0) if the feature does not exist or an unknown
748 * unknown feature is requested, non-zero otherwise.
749 */
750 int
xmlHasFeature(xmlFeature feature)751 xmlHasFeature(xmlFeature feature)
752 {
753 switch (feature) {
754 case XML_WITH_THREAD:
755 #ifdef LIBXML_THREAD_ENABLED
756 return(1);
757 #else
758 return(0);
759 #endif
760 case XML_WITH_TREE:
761 #ifdef LIBXML_TREE_ENABLED
762 return(1);
763 #else
764 return(0);
765 #endif
766 case XML_WITH_OUTPUT:
767 #ifdef LIBXML_OUTPUT_ENABLED
768 return(1);
769 #else
770 return(0);
771 #endif
772 case XML_WITH_PUSH:
773 #ifdef LIBXML_PUSH_ENABLED
774 return(1);
775 #else
776 return(0);
777 #endif
778 case XML_WITH_READER:
779 #ifdef LIBXML_READER_ENABLED
780 return(1);
781 #else
782 return(0);
783 #endif
784 case XML_WITH_PATTERN:
785 #ifdef LIBXML_PATTERN_ENABLED
786 return(1);
787 #else
788 return(0);
789 #endif
790 case XML_WITH_WRITER:
791 #ifdef LIBXML_WRITER_ENABLED
792 return(1);
793 #else
794 return(0);
795 #endif
796 case XML_WITH_SAX1:
797 #ifdef LIBXML_SAX1_ENABLED
798 return(1);
799 #else
800 return(0);
801 #endif
802 case XML_WITH_FTP:
803 #ifdef LIBXML_FTP_ENABLED
804 return(1);
805 #else
806 return(0);
807 #endif
808 case XML_WITH_HTTP:
809 #ifdef LIBXML_HTTP_ENABLED
810 return(1);
811 #else
812 return(0);
813 #endif
814 case XML_WITH_VALID:
815 #ifdef LIBXML_VALID_ENABLED
816 return(1);
817 #else
818 return(0);
819 #endif
820 case XML_WITH_HTML:
821 #ifdef LIBXML_HTML_ENABLED
822 return(1);
823 #else
824 return(0);
825 #endif
826 case XML_WITH_LEGACY:
827 #ifdef LIBXML_LEGACY_ENABLED
828 return(1);
829 #else
830 return(0);
831 #endif
832 case XML_WITH_C14N:
833 #ifdef LIBXML_C14N_ENABLED
834 return(1);
835 #else
836 return(0);
837 #endif
838 case XML_WITH_CATALOG:
839 #ifdef LIBXML_CATALOG_ENABLED
840 return(1);
841 #else
842 return(0);
843 #endif
844 case XML_WITH_XPATH:
845 #ifdef LIBXML_XPATH_ENABLED
846 return(1);
847 #else
848 return(0);
849 #endif
850 case XML_WITH_XPTR:
851 #ifdef LIBXML_XPTR_ENABLED
852 return(1);
853 #else
854 return(0);
855 #endif
856 case XML_WITH_XINCLUDE:
857 #ifdef LIBXML_XINCLUDE_ENABLED
858 return(1);
859 #else
860 return(0);
861 #endif
862 case XML_WITH_ICONV:
863 #ifdef LIBXML_ICONV_ENABLED
864 return(1);
865 #else
866 return(0);
867 #endif
868 case XML_WITH_ISO8859X:
869 #ifdef LIBXML_ISO8859X_ENABLED
870 return(1);
871 #else
872 return(0);
873 #endif
874 case XML_WITH_UNICODE:
875 #ifdef LIBXML_UNICODE_ENABLED
876 return(1);
877 #else
878 return(0);
879 #endif
880 case XML_WITH_REGEXP:
881 #ifdef LIBXML_REGEXP_ENABLED
882 return(1);
883 #else
884 return(0);
885 #endif
886 case XML_WITH_AUTOMATA:
887 #ifdef LIBXML_AUTOMATA_ENABLED
888 return(1);
889 #else
890 return(0);
891 #endif
892 case XML_WITH_EXPR:
893 #ifdef LIBXML_EXPR_ENABLED
894 return(1);
895 #else
896 return(0);
897 #endif
898 case XML_WITH_SCHEMAS:
899 #ifdef LIBXML_SCHEMAS_ENABLED
900 return(1);
901 #else
902 return(0);
903 #endif
904 case XML_WITH_SCHEMATRON:
905 #ifdef LIBXML_SCHEMATRON_ENABLED
906 return(1);
907 #else
908 return(0);
909 #endif
910 case XML_WITH_MODULES:
911 #ifdef LIBXML_MODULES_ENABLED
912 return(1);
913 #else
914 return(0);
915 #endif
916 case XML_WITH_DEBUG:
917 #ifdef LIBXML_DEBUG_ENABLED
918 return(1);
919 #else
920 return(0);
921 #endif
922 case XML_WITH_DEBUG_MEM:
923 #ifdef DEBUG_MEMORY_LOCATION
924 return(1);
925 #else
926 return(0);
927 #endif
928 case XML_WITH_DEBUG_RUN:
929 #ifdef LIBXML_DEBUG_RUNTIME
930 return(1);
931 #else
932 return(0);
933 #endif
934 case XML_WITH_ZLIB:
935 #ifdef LIBXML_ZLIB_ENABLED
936 return(1);
937 #else
938 return(0);
939 #endif
940 case XML_WITH_ICU:
941 #ifdef LIBXML_ICU_ENABLED
942 return(1);
943 #else
944 return(0);
945 #endif
946 default:
947 break;
948 }
949 return(0);
950 }
951
952 /************************************************************************
953 * *
954 * SAX2 defaulted attributes handling *
955 * *
956 ************************************************************************/
957
958 /**
959 * xmlDetectSAX2:
960 * @ctxt: an XML parser context
961 *
962 * Do the SAX2 detection and specific intialization
963 */
964 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)965 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
966 if (ctxt == NULL) return;
967 #ifdef LIBXML_SAX1_ENABLED
968 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
969 ((ctxt->sax->startElementNs != NULL) ||
970 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
971 #else
972 ctxt->sax2 = 1;
973 #endif /* LIBXML_SAX1_ENABLED */
974
975 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
976 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
977 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
978 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
979 (ctxt->str_xml_ns == NULL)) {
980 xmlErrMemory(ctxt, NULL);
981 }
982 }
983
984 typedef struct _xmlDefAttrs xmlDefAttrs;
985 typedef xmlDefAttrs *xmlDefAttrsPtr;
986 struct _xmlDefAttrs {
987 int nbAttrs; /* number of defaulted attributes on that element */
988 int maxAttrs; /* the size of the array */
989 const xmlChar *values[5]; /* array of localname/prefix/values/external */
990 };
991
992 /**
993 * xmlAttrNormalizeSpace:
994 * @src: the source string
995 * @dst: the target string
996 *
997 * Normalize the space in non CDATA attribute values:
998 * If the attribute type is not CDATA, then the XML processor MUST further
999 * process the normalized attribute value by discarding any leading and
1000 * trailing space (#x20) characters, and by replacing sequences of space
1001 * (#x20) characters by a single space (#x20) character.
1002 * Note that the size of dst need to be at least src, and if one doesn't need
1003 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1004 * passing src as dst is just fine.
1005 *
1006 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1007 * is needed.
1008 */
1009 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1010 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1011 {
1012 if ((src == NULL) || (dst == NULL))
1013 return(NULL);
1014
1015 while (*src == 0x20) src++;
1016 while (*src != 0) {
1017 if (*src == 0x20) {
1018 while (*src == 0x20) src++;
1019 if (*src != 0)
1020 *dst++ = 0x20;
1021 } else {
1022 *dst++ = *src++;
1023 }
1024 }
1025 *dst = 0;
1026 if (dst == src)
1027 return(NULL);
1028 return(dst);
1029 }
1030
1031 /**
1032 * xmlAttrNormalizeSpace2:
1033 * @src: the source string
1034 *
1035 * Normalize the space in non CDATA attribute values, a slightly more complex
1036 * front end to avoid allocation problems when running on attribute values
1037 * coming from the input.
1038 *
1039 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1040 * is needed.
1041 */
1042 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1043 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1044 {
1045 int i;
1046 int remove_head = 0;
1047 int need_realloc = 0;
1048 const xmlChar *cur;
1049
1050 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1051 return(NULL);
1052 i = *len;
1053 if (i <= 0)
1054 return(NULL);
1055
1056 cur = src;
1057 while (*cur == 0x20) {
1058 cur++;
1059 remove_head++;
1060 }
1061 while (*cur != 0) {
1062 if (*cur == 0x20) {
1063 cur++;
1064 if ((*cur == 0x20) || (*cur == 0)) {
1065 need_realloc = 1;
1066 break;
1067 }
1068 } else
1069 cur++;
1070 }
1071 if (need_realloc) {
1072 xmlChar *ret;
1073
1074 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1075 if (ret == NULL) {
1076 xmlErrMemory(ctxt, NULL);
1077 return(NULL);
1078 }
1079 xmlAttrNormalizeSpace(ret, ret);
1080 *len = (int) strlen((const char *)ret);
1081 return(ret);
1082 } else if (remove_head) {
1083 *len -= remove_head;
1084 memmove(src, src + remove_head, 1 + *len);
1085 return(src);
1086 }
1087 return(NULL);
1088 }
1089
1090 /**
1091 * xmlAddDefAttrs:
1092 * @ctxt: an XML parser context
1093 * @fullname: the element fullname
1094 * @fullattr: the attribute fullname
1095 * @value: the attribute value
1096 *
1097 * Add a defaulted attribute for an element
1098 */
1099 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1100 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1101 const xmlChar *fullname,
1102 const xmlChar *fullattr,
1103 const xmlChar *value) {
1104 xmlDefAttrsPtr defaults;
1105 int len;
1106 const xmlChar *name;
1107 const xmlChar *prefix;
1108
1109 /*
1110 * Allows to detect attribute redefinitions
1111 */
1112 if (ctxt->attsSpecial != NULL) {
1113 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1114 return;
1115 }
1116
1117 if (ctxt->attsDefault == NULL) {
1118 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1119 if (ctxt->attsDefault == NULL)
1120 goto mem_error;
1121 }
1122
1123 /*
1124 * split the element name into prefix:localname , the string found
1125 * are within the DTD and then not associated to namespace names.
1126 */
1127 name = xmlSplitQName3(fullname, &len);
1128 if (name == NULL) {
1129 name = xmlDictLookup(ctxt->dict, fullname, -1);
1130 prefix = NULL;
1131 } else {
1132 name = xmlDictLookup(ctxt->dict, name, -1);
1133 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1134 }
1135
1136 /*
1137 * make sure there is some storage
1138 */
1139 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1140 if (defaults == NULL) {
1141 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1142 (4 * 5) * sizeof(const xmlChar *));
1143 if (defaults == NULL)
1144 goto mem_error;
1145 defaults->nbAttrs = 0;
1146 defaults->maxAttrs = 4;
1147 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1148 defaults, NULL) < 0) {
1149 xmlFree(defaults);
1150 goto mem_error;
1151 }
1152 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1153 xmlDefAttrsPtr temp;
1154
1155 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1156 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1157 if (temp == NULL)
1158 goto mem_error;
1159 defaults = temp;
1160 defaults->maxAttrs *= 2;
1161 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1162 defaults, NULL) < 0) {
1163 xmlFree(defaults);
1164 goto mem_error;
1165 }
1166 }
1167
1168 /*
1169 * Split the element name into prefix:localname , the string found
1170 * are within the DTD and hen not associated to namespace names.
1171 */
1172 name = xmlSplitQName3(fullattr, &len);
1173 if (name == NULL) {
1174 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1175 prefix = NULL;
1176 } else {
1177 name = xmlDictLookup(ctxt->dict, name, -1);
1178 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1179 }
1180
1181 defaults->values[5 * defaults->nbAttrs] = name;
1182 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1183 /* intern the string and precompute the end */
1184 len = xmlStrlen(value);
1185 value = xmlDictLookup(ctxt->dict, value, len);
1186 defaults->values[5 * defaults->nbAttrs + 2] = value;
1187 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1188 if (ctxt->external)
1189 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1190 else
1191 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1192 defaults->nbAttrs++;
1193
1194 return;
1195
1196 mem_error:
1197 xmlErrMemory(ctxt, NULL);
1198 return;
1199 }
1200
1201 /**
1202 * xmlAddSpecialAttr:
1203 * @ctxt: an XML parser context
1204 * @fullname: the element fullname
1205 * @fullattr: the attribute fullname
1206 * @type: the attribute type
1207 *
1208 * Register this attribute type
1209 */
1210 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1211 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1212 const xmlChar *fullname,
1213 const xmlChar *fullattr,
1214 int type)
1215 {
1216 if (ctxt->attsSpecial == NULL) {
1217 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1218 if (ctxt->attsSpecial == NULL)
1219 goto mem_error;
1220 }
1221
1222 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1223 return;
1224
1225 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1226 (void *) (long) type);
1227 return;
1228
1229 mem_error:
1230 xmlErrMemory(ctxt, NULL);
1231 return;
1232 }
1233
1234 /**
1235 * xmlCleanSpecialAttrCallback:
1236 *
1237 * Removes CDATA attributes from the special attribute table
1238 */
1239 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1240 xmlCleanSpecialAttrCallback(void *payload, void *data,
1241 const xmlChar *fullname, const xmlChar *fullattr,
1242 const xmlChar *unused ATTRIBUTE_UNUSED) {
1243 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1244
1245 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1246 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1247 }
1248 }
1249
1250 /**
1251 * xmlCleanSpecialAttr:
1252 * @ctxt: an XML parser context
1253 *
1254 * Trim the list of attributes defined to remove all those of type
1255 * CDATA as they are not special. This call should be done when finishing
1256 * to parse the DTD and before starting to parse the document root.
1257 */
1258 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1259 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1260 {
1261 if (ctxt->attsSpecial == NULL)
1262 return;
1263
1264 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1265
1266 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1267 xmlHashFree(ctxt->attsSpecial, NULL);
1268 ctxt->attsSpecial = NULL;
1269 }
1270 return;
1271 }
1272
1273 /**
1274 * xmlCheckLanguageID:
1275 * @lang: pointer to the string value
1276 *
1277 * Checks that the value conforms to the LanguageID production:
1278 *
1279 * NOTE: this is somewhat deprecated, those productions were removed from
1280 * the XML Second edition.
1281 *
1282 * [33] LanguageID ::= Langcode ('-' Subcode)*
1283 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1284 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1285 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1286 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1287 * [38] Subcode ::= ([a-z] | [A-Z])+
1288 *
1289 * Returns 1 if correct 0 otherwise
1290 **/
1291 int
xmlCheckLanguageID(const xmlChar * lang)1292 xmlCheckLanguageID(const xmlChar * lang)
1293 {
1294 const xmlChar *cur = lang;
1295
1296 if (cur == NULL)
1297 return (0);
1298 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1299 ((cur[0] == 'I') && (cur[1] == '-'))) {
1300 /*
1301 * IANA code
1302 */
1303 cur += 2;
1304 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1305 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1306 cur++;
1307 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1308 ((cur[0] == 'X') && (cur[1] == '-'))) {
1309 /*
1310 * User code
1311 */
1312 cur += 2;
1313 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1314 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1315 cur++;
1316 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1317 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1318 /*
1319 * ISO639
1320 */
1321 cur++;
1322 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1323 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1324 cur++;
1325 else
1326 return (0);
1327 } else
1328 return (0);
1329 while (cur[0] != 0) { /* non input consuming */
1330 if (cur[0] != '-')
1331 return (0);
1332 cur++;
1333 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1334 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1335 cur++;
1336 else
1337 return (0);
1338 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1339 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1340 cur++;
1341 }
1342 return (1);
1343 }
1344
1345 /************************************************************************
1346 * *
1347 * Parser stacks related functions and macros *
1348 * *
1349 ************************************************************************/
1350
1351 xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1352 const xmlChar ** str);
1353
1354 #ifdef SAX2
1355 /**
1356 * nsPush:
1357 * @ctxt: an XML parser context
1358 * @prefix: the namespace prefix or NULL
1359 * @URL: the namespace name
1360 *
1361 * Pushes a new parser namespace on top of the ns stack
1362 *
1363 * Returns -1 in case of error, -2 if the namespace should be discarded
1364 * and the index in the stack otherwise.
1365 */
1366 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1367 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1368 {
1369 if (ctxt->options & XML_PARSE_NSCLEAN) {
1370 int i;
1371 for (i = 0;i < ctxt->nsNr;i += 2) {
1372 if (ctxt->nsTab[i] == prefix) {
1373 /* in scope */
1374 if (ctxt->nsTab[i + 1] == URL)
1375 return(-2);
1376 /* out of scope keep it */
1377 break;
1378 }
1379 }
1380 }
1381 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1382 ctxt->nsMax = 10;
1383 ctxt->nsNr = 0;
1384 ctxt->nsTab = (const xmlChar **)
1385 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1386 if (ctxt->nsTab == NULL) {
1387 xmlErrMemory(ctxt, NULL);
1388 ctxt->nsMax = 0;
1389 return (-1);
1390 }
1391 } else if (ctxt->nsNr >= ctxt->nsMax) {
1392 const xmlChar ** tmp;
1393 ctxt->nsMax *= 2;
1394 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1395 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1396 if (tmp == NULL) {
1397 xmlErrMemory(ctxt, NULL);
1398 ctxt->nsMax /= 2;
1399 return (-1);
1400 }
1401 ctxt->nsTab = tmp;
1402 }
1403 ctxt->nsTab[ctxt->nsNr++] = prefix;
1404 ctxt->nsTab[ctxt->nsNr++] = URL;
1405 return (ctxt->nsNr);
1406 }
1407 /**
1408 * nsPop:
1409 * @ctxt: an XML parser context
1410 * @nr: the number to pop
1411 *
1412 * Pops the top @nr parser prefix/namespace from the ns stack
1413 *
1414 * Returns the number of namespaces removed
1415 */
1416 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1417 nsPop(xmlParserCtxtPtr ctxt, int nr)
1418 {
1419 int i;
1420
1421 if (ctxt->nsTab == NULL) return(0);
1422 if (ctxt->nsNr < nr) {
1423 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1424 nr = ctxt->nsNr;
1425 }
1426 if (ctxt->nsNr <= 0)
1427 return (0);
1428
1429 for (i = 0;i < nr;i++) {
1430 ctxt->nsNr--;
1431 ctxt->nsTab[ctxt->nsNr] = NULL;
1432 }
1433 return(nr);
1434 }
1435 #endif
1436
1437 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1438 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1439 const xmlChar **atts;
1440 int *attallocs;
1441 int maxatts;
1442
1443 if (ctxt->atts == NULL) {
1444 maxatts = 55; /* allow for 10 attrs by default */
1445 atts = (const xmlChar **)
1446 xmlMalloc(maxatts * sizeof(xmlChar *));
1447 if (atts == NULL) goto mem_error;
1448 ctxt->atts = atts;
1449 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1450 if (attallocs == NULL) goto mem_error;
1451 ctxt->attallocs = attallocs;
1452 ctxt->maxatts = maxatts;
1453 } else if (nr + 5 > ctxt->maxatts) {
1454 maxatts = (nr + 5) * 2;
1455 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1456 maxatts * sizeof(const xmlChar *));
1457 if (atts == NULL) goto mem_error;
1458 ctxt->atts = atts;
1459 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1460 (maxatts / 5) * sizeof(int));
1461 if (attallocs == NULL) goto mem_error;
1462 ctxt->attallocs = attallocs;
1463 ctxt->maxatts = maxatts;
1464 }
1465 return(ctxt->maxatts);
1466 mem_error:
1467 xmlErrMemory(ctxt, NULL);
1468 return(-1);
1469 }
1470
1471 /**
1472 * inputPush:
1473 * @ctxt: an XML parser context
1474 * @value: the parser input
1475 *
1476 * Pushes a new parser input on top of the input stack
1477 *
1478 * Returns -1 in case of error, the index in the stack otherwise
1479 */
1480 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1481 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1482 {
1483 if ((ctxt == NULL) || (value == NULL))
1484 return(-1);
1485 if (ctxt->inputNr >= ctxt->inputMax) {
1486 ctxt->inputMax *= 2;
1487 ctxt->inputTab =
1488 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1489 ctxt->inputMax *
1490 sizeof(ctxt->inputTab[0]));
1491 if (ctxt->inputTab == NULL) {
1492 xmlErrMemory(ctxt, NULL);
1493 xmlFreeInputStream(value);
1494 ctxt->inputMax /= 2;
1495 value = NULL;
1496 return (-1);
1497 }
1498 }
1499 ctxt->inputTab[ctxt->inputNr] = value;
1500 ctxt->input = value;
1501 return (ctxt->inputNr++);
1502 }
1503 /**
1504 * inputPop:
1505 * @ctxt: an XML parser context
1506 *
1507 * Pops the top parser input from the input stack
1508 *
1509 * Returns the input just removed
1510 */
1511 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1512 inputPop(xmlParserCtxtPtr ctxt)
1513 {
1514 xmlParserInputPtr ret;
1515
1516 if (ctxt == NULL)
1517 return(NULL);
1518 if (ctxt->inputNr <= 0)
1519 return (NULL);
1520 ctxt->inputNr--;
1521 if (ctxt->inputNr > 0)
1522 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1523 else
1524 ctxt->input = NULL;
1525 ret = ctxt->inputTab[ctxt->inputNr];
1526 ctxt->inputTab[ctxt->inputNr] = NULL;
1527 return (ret);
1528 }
1529 /**
1530 * nodePush:
1531 * @ctxt: an XML parser context
1532 * @value: the element node
1533 *
1534 * Pushes a new element node on top of the node stack
1535 *
1536 * Returns -1 in case of error, the index in the stack otherwise
1537 */
1538 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1539 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1540 {
1541 if (ctxt == NULL) return(0);
1542 if (ctxt->nodeNr >= ctxt->nodeMax) {
1543 xmlNodePtr *tmp;
1544
1545 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1546 ctxt->nodeMax * 2 *
1547 sizeof(ctxt->nodeTab[0]));
1548 if (tmp == NULL) {
1549 xmlErrMemory(ctxt, NULL);
1550 return (-1);
1551 }
1552 ctxt->nodeTab = tmp;
1553 ctxt->nodeMax *= 2;
1554 }
1555 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1556 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1557 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1558 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1559 xmlParserMaxDepth);
1560 ctxt->instate = XML_PARSER_EOF;
1561 return(-1);
1562 }
1563 ctxt->nodeTab[ctxt->nodeNr] = value;
1564 ctxt->node = value;
1565 return (ctxt->nodeNr++);
1566 }
1567
1568 /**
1569 * nodePop:
1570 * @ctxt: an XML parser context
1571 *
1572 * Pops the top element node from the node stack
1573 *
1574 * Returns the node just removed
1575 */
1576 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1577 nodePop(xmlParserCtxtPtr ctxt)
1578 {
1579 xmlNodePtr ret;
1580
1581 if (ctxt == NULL) return(NULL);
1582 if (ctxt->nodeNr <= 0)
1583 return (NULL);
1584 ctxt->nodeNr--;
1585 if (ctxt->nodeNr > 0)
1586 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1587 else
1588 ctxt->node = NULL;
1589 ret = ctxt->nodeTab[ctxt->nodeNr];
1590 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1591 return (ret);
1592 }
1593
1594 #ifdef LIBXML_PUSH_ENABLED
1595 /**
1596 * nameNsPush:
1597 * @ctxt: an XML parser context
1598 * @value: the element name
1599 * @prefix: the element prefix
1600 * @URI: the element namespace name
1601 *
1602 * Pushes a new element name/prefix/URL on top of the name stack
1603 *
1604 * Returns -1 in case of error, the index in the stack otherwise
1605 */
1606 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1607 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1608 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1609 {
1610 if (ctxt->nameNr >= ctxt->nameMax) {
1611 const xmlChar * *tmp;
1612 void **tmp2;
1613 ctxt->nameMax *= 2;
1614 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1615 ctxt->nameMax *
1616 sizeof(ctxt->nameTab[0]));
1617 if (tmp == NULL) {
1618 ctxt->nameMax /= 2;
1619 goto mem_error;
1620 }
1621 ctxt->nameTab = tmp;
1622 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1623 ctxt->nameMax * 3 *
1624 sizeof(ctxt->pushTab[0]));
1625 if (tmp2 == NULL) {
1626 ctxt->nameMax /= 2;
1627 goto mem_error;
1628 }
1629 ctxt->pushTab = tmp2;
1630 }
1631 ctxt->nameTab[ctxt->nameNr] = value;
1632 ctxt->name = value;
1633 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1634 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1635 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1636 return (ctxt->nameNr++);
1637 mem_error:
1638 xmlErrMemory(ctxt, NULL);
1639 return (-1);
1640 }
1641 /**
1642 * nameNsPop:
1643 * @ctxt: an XML parser context
1644 *
1645 * Pops the top element/prefix/URI name from the name stack
1646 *
1647 * Returns the name just removed
1648 */
1649 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1650 nameNsPop(xmlParserCtxtPtr ctxt)
1651 {
1652 const xmlChar *ret;
1653
1654 if (ctxt->nameNr <= 0)
1655 return (NULL);
1656 ctxt->nameNr--;
1657 if (ctxt->nameNr > 0)
1658 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1659 else
1660 ctxt->name = NULL;
1661 ret = ctxt->nameTab[ctxt->nameNr];
1662 ctxt->nameTab[ctxt->nameNr] = NULL;
1663 return (ret);
1664 }
1665 #endif /* LIBXML_PUSH_ENABLED */
1666
1667 /**
1668 * namePush:
1669 * @ctxt: an XML parser context
1670 * @value: the element name
1671 *
1672 * Pushes a new element name on top of the name stack
1673 *
1674 * Returns -1 in case of error, the index in the stack otherwise
1675 */
1676 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1677 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1678 {
1679 if (ctxt == NULL) return (-1);
1680
1681 if (ctxt->nameNr >= ctxt->nameMax) {
1682 const xmlChar * *tmp;
1683 ctxt->nameMax *= 2;
1684 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1685 ctxt->nameMax *
1686 sizeof(ctxt->nameTab[0]));
1687 if (tmp == NULL) {
1688 ctxt->nameMax /= 2;
1689 goto mem_error;
1690 }
1691 ctxt->nameTab = tmp;
1692 }
1693 ctxt->nameTab[ctxt->nameNr] = value;
1694 ctxt->name = value;
1695 return (ctxt->nameNr++);
1696 mem_error:
1697 xmlErrMemory(ctxt, NULL);
1698 return (-1);
1699 }
1700 /**
1701 * namePop:
1702 * @ctxt: an XML parser context
1703 *
1704 * Pops the top element name from the name stack
1705 *
1706 * Returns the name just removed
1707 */
1708 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1709 namePop(xmlParserCtxtPtr ctxt)
1710 {
1711 const xmlChar *ret;
1712
1713 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1714 return (NULL);
1715 ctxt->nameNr--;
1716 if (ctxt->nameNr > 0)
1717 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1718 else
1719 ctxt->name = NULL;
1720 ret = ctxt->nameTab[ctxt->nameNr];
1721 ctxt->nameTab[ctxt->nameNr] = NULL;
1722 return (ret);
1723 }
1724
spacePush(xmlParserCtxtPtr ctxt,int val)1725 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1726 if (ctxt->spaceNr >= ctxt->spaceMax) {
1727 int *tmp;
1728
1729 ctxt->spaceMax *= 2;
1730 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1731 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1732 if (tmp == NULL) {
1733 xmlErrMemory(ctxt, NULL);
1734 ctxt->spaceMax /=2;
1735 return(-1);
1736 }
1737 ctxt->spaceTab = tmp;
1738 }
1739 ctxt->spaceTab[ctxt->spaceNr] = val;
1740 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1741 return(ctxt->spaceNr++);
1742 }
1743
spacePop(xmlParserCtxtPtr ctxt)1744 static int spacePop(xmlParserCtxtPtr ctxt) {
1745 int ret;
1746 if (ctxt->spaceNr <= 0) return(0);
1747 ctxt->spaceNr--;
1748 if (ctxt->spaceNr > 0)
1749 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1750 else
1751 ctxt->space = &ctxt->spaceTab[0];
1752 ret = ctxt->spaceTab[ctxt->spaceNr];
1753 ctxt->spaceTab[ctxt->spaceNr] = -1;
1754 return(ret);
1755 }
1756
1757 /*
1758 * Macros for accessing the content. Those should be used only by the parser,
1759 * and not exported.
1760 *
1761 * Dirty macros, i.e. one often need to make assumption on the context to
1762 * use them
1763 *
1764 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1765 * To be used with extreme caution since operations consuming
1766 * characters may move the input buffer to a different location !
1767 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1768 * This should be used internally by the parser
1769 * only to compare to ASCII values otherwise it would break when
1770 * running with UTF-8 encoding.
1771 * RAW same as CUR but in the input buffer, bypass any token
1772 * extraction that may have been done
1773 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1774 * to compare on ASCII based substring.
1775 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1776 * strings without newlines within the parser.
1777 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1778 * defined char within the parser.
1779 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1780 *
1781 * NEXT Skip to the next character, this does the proper decoding
1782 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1783 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1784 * CUR_CHAR(l) returns the current unicode character (int), set l
1785 * to the number of xmlChars used for the encoding [0-5].
1786 * CUR_SCHAR same but operate on a string instead of the context
1787 * COPY_BUF copy the current unicode char to the target buffer, increment
1788 * the index
1789 * GROW, SHRINK handling of input buffers
1790 */
1791
1792 #define RAW (*ctxt->input->cur)
1793 #define CUR (*ctxt->input->cur)
1794 #define NXT(val) ctxt->input->cur[(val)]
1795 #define CUR_PTR ctxt->input->cur
1796
1797 #define CMP4( s, c1, c2, c3, c4 ) \
1798 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1799 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1800 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1801 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1802 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1803 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1804 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1805 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1806 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1807 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1808 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1809 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1810 ((unsigned char *) s)[ 8 ] == c9 )
1811 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1812 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1813 ((unsigned char *) s)[ 9 ] == c10 )
1814
1815 #define SKIP(val) do { \
1816 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1817 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1818 if ((*ctxt->input->cur == 0) && \
1819 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1820 xmlPopInput(ctxt); \
1821 } while (0)
1822
1823 #define SKIPL(val) do { \
1824 int skipl; \
1825 for(skipl=0; skipl<val; skipl++) { \
1826 if (*(ctxt->input->cur) == '\n') { \
1827 ctxt->input->line++; ctxt->input->col = 1; \
1828 } else ctxt->input->col++; \
1829 ctxt->nbChars++; \
1830 ctxt->input->cur++; \
1831 } \
1832 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1833 if ((*ctxt->input->cur == 0) && \
1834 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1835 xmlPopInput(ctxt); \
1836 } while (0)
1837
1838 #define SHRINK if ((ctxt->progressive == 0) && \
1839 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1840 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1841 xmlSHRINK (ctxt);
1842
xmlSHRINK(xmlParserCtxtPtr ctxt)1843 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1844 xmlParserInputShrink(ctxt->input);
1845 if ((*ctxt->input->cur == 0) &&
1846 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1847 xmlPopInput(ctxt);
1848 }
1849
1850 #define GROW if ((ctxt->progressive == 0) && \
1851 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1852 xmlGROW (ctxt);
1853
xmlGROW(xmlParserCtxtPtr ctxt)1854 static void xmlGROW (xmlParserCtxtPtr ctxt) {
1855 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1856 if ((*ctxt->input->cur == 0) &&
1857 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1858 xmlPopInput(ctxt);
1859 }
1860
1861 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1862
1863 #define NEXT xmlNextChar(ctxt)
1864
1865 #define NEXT1 { \
1866 ctxt->input->col++; \
1867 ctxt->input->cur++; \
1868 ctxt->nbChars++; \
1869 if (*ctxt->input->cur == 0) \
1870 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1871 }
1872
1873 #define NEXTL(l) do { \
1874 if (*(ctxt->input->cur) == '\n') { \
1875 ctxt->input->line++; ctxt->input->col = 1; \
1876 } else ctxt->input->col++; \
1877 ctxt->input->cur += l; \
1878 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1879 } while (0)
1880
1881 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1882 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1883
1884 #define COPY_BUF(l,b,i,v) \
1885 if (l == 1) b[i++] = (xmlChar) v; \
1886 else i += xmlCopyCharMultiByte(&b[i],v)
1887
1888 /**
1889 * xmlSkipBlankChars:
1890 * @ctxt: the XML parser context
1891 *
1892 * skip all blanks character found at that point in the input streams.
1893 * It pops up finished entities in the process if allowable at that point.
1894 *
1895 * Returns the number of space chars skipped
1896 */
1897
1898 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)1899 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1900 int res = 0;
1901
1902 /*
1903 * It's Okay to use CUR/NEXT here since all the blanks are on
1904 * the ASCII range.
1905 */
1906 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1907 const xmlChar *cur;
1908 /*
1909 * if we are in the document content, go really fast
1910 */
1911 cur = ctxt->input->cur;
1912 while (IS_BLANK_CH(*cur)) {
1913 if (*cur == '\n') {
1914 ctxt->input->line++; ctxt->input->col = 1;
1915 }
1916 cur++;
1917 res++;
1918 if (*cur == 0) {
1919 ctxt->input->cur = cur;
1920 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1921 cur = ctxt->input->cur;
1922 }
1923 }
1924 ctxt->input->cur = cur;
1925 } else {
1926 int cur;
1927 do {
1928 cur = CUR;
1929 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1930 NEXT;
1931 cur = CUR;
1932 res++;
1933 }
1934 while ((cur == 0) && (ctxt->inputNr > 1) &&
1935 (ctxt->instate != XML_PARSER_COMMENT)) {
1936 xmlPopInput(ctxt);
1937 cur = CUR;
1938 }
1939 /*
1940 * Need to handle support of entities branching here
1941 */
1942 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1943 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1944 }
1945 return(res);
1946 }
1947
1948 /************************************************************************
1949 * *
1950 * Commodity functions to handle entities *
1951 * *
1952 ************************************************************************/
1953
1954 /**
1955 * xmlPopInput:
1956 * @ctxt: an XML parser context
1957 *
1958 * xmlPopInput: the current input pointed by ctxt->input came to an end
1959 * pop it and return the next char.
1960 *
1961 * Returns the current xmlChar in the parser context
1962 */
1963 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)1964 xmlPopInput(xmlParserCtxtPtr ctxt) {
1965 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1966 if (xmlParserDebugEntities)
1967 xmlGenericError(xmlGenericErrorContext,
1968 "Popping input %d\n", ctxt->inputNr);
1969 xmlFreeInputStream(inputPop(ctxt));
1970 if ((*ctxt->input->cur == 0) &&
1971 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1972 return(xmlPopInput(ctxt));
1973 return(CUR);
1974 }
1975
1976 /**
1977 * xmlPushInput:
1978 * @ctxt: an XML parser context
1979 * @input: an XML parser input fragment (entity, XML fragment ...).
1980 *
1981 * xmlPushInput: switch to a new input stream which is stacked on top
1982 * of the previous one(s).
1983 * Returns -1 in case of error or the index in the input stack
1984 */
1985 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)1986 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1987 int ret;
1988 if (input == NULL) return(-1);
1989
1990 if (xmlParserDebugEntities) {
1991 if ((ctxt->input != NULL) && (ctxt->input->filename))
1992 xmlGenericError(xmlGenericErrorContext,
1993 "%s(%d): ", ctxt->input->filename,
1994 ctxt->input->line);
1995 xmlGenericError(xmlGenericErrorContext,
1996 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1997 }
1998 ret = inputPush(ctxt, input);
1999 GROW;
2000 return(ret);
2001 }
2002
2003 /**
2004 * xmlParseCharRef:
2005 * @ctxt: an XML parser context
2006 *
2007 * parse Reference declarations
2008 *
2009 * [66] CharRef ::= '&#' [0-9]+ ';' |
2010 * '&#x' [0-9a-fA-F]+ ';'
2011 *
2012 * [ WFC: Legal Character ]
2013 * Characters referred to using character references must match the
2014 * production for Char.
2015 *
2016 * Returns the value parsed (as an int), 0 in case of error
2017 */
2018 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2019 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2020 unsigned int val = 0;
2021 int count = 0;
2022 unsigned int outofrange = 0;
2023
2024 /*
2025 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2026 */
2027 if ((RAW == '&') && (NXT(1) == '#') &&
2028 (NXT(2) == 'x')) {
2029 SKIP(3);
2030 GROW;
2031 while (RAW != ';') { /* loop blocked by count */
2032 if (count++ > 20) {
2033 count = 0;
2034 GROW;
2035 }
2036 if ((RAW >= '0') && (RAW <= '9'))
2037 val = val * 16 + (CUR - '0');
2038 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2039 val = val * 16 + (CUR - 'a') + 10;
2040 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2041 val = val * 16 + (CUR - 'A') + 10;
2042 else {
2043 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2044 val = 0;
2045 break;
2046 }
2047 if (val > 0x10FFFF)
2048 outofrange = val;
2049
2050 NEXT;
2051 count++;
2052 }
2053 if (RAW == ';') {
2054 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2055 ctxt->input->col++;
2056 ctxt->nbChars ++;
2057 ctxt->input->cur++;
2058 }
2059 } else if ((RAW == '&') && (NXT(1) == '#')) {
2060 SKIP(2);
2061 GROW;
2062 while (RAW != ';') { /* loop blocked by count */
2063 if (count++ > 20) {
2064 count = 0;
2065 GROW;
2066 }
2067 if ((RAW >= '0') && (RAW <= '9'))
2068 val = val * 10 + (CUR - '0');
2069 else {
2070 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2071 val = 0;
2072 break;
2073 }
2074 if (val > 0x10FFFF)
2075 outofrange = val;
2076
2077 NEXT;
2078 count++;
2079 }
2080 if (RAW == ';') {
2081 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2082 ctxt->input->col++;
2083 ctxt->nbChars ++;
2084 ctxt->input->cur++;
2085 }
2086 } else {
2087 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2088 }
2089
2090 /*
2091 * [ WFC: Legal Character ]
2092 * Characters referred to using character references must match the
2093 * production for Char.
2094 */
2095 if ((IS_CHAR(val) && (outofrange == 0))) {
2096 return(val);
2097 } else {
2098 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2099 "xmlParseCharRef: invalid xmlChar value %d\n",
2100 val);
2101 }
2102 return(0);
2103 }
2104
2105 /**
2106 * xmlParseStringCharRef:
2107 * @ctxt: an XML parser context
2108 * @str: a pointer to an index in the string
2109 *
2110 * parse Reference declarations, variant parsing from a string rather
2111 * than an an input flow.
2112 *
2113 * [66] CharRef ::= '&#' [0-9]+ ';' |
2114 * '&#x' [0-9a-fA-F]+ ';'
2115 *
2116 * [ WFC: Legal Character ]
2117 * Characters referred to using character references must match the
2118 * production for Char.
2119 *
2120 * Returns the value parsed (as an int), 0 in case of error, str will be
2121 * updated to the current value of the index
2122 */
2123 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2124 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2125 const xmlChar *ptr;
2126 xmlChar cur;
2127 unsigned int val = 0;
2128 unsigned int outofrange = 0;
2129
2130 if ((str == NULL) || (*str == NULL)) return(0);
2131 ptr = *str;
2132 cur = *ptr;
2133 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2134 ptr += 3;
2135 cur = *ptr;
2136 while (cur != ';') { /* Non input consuming loop */
2137 if ((cur >= '0') && (cur <= '9'))
2138 val = val * 16 + (cur - '0');
2139 else if ((cur >= 'a') && (cur <= 'f'))
2140 val = val * 16 + (cur - 'a') + 10;
2141 else if ((cur >= 'A') && (cur <= 'F'))
2142 val = val * 16 + (cur - 'A') + 10;
2143 else {
2144 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2145 val = 0;
2146 break;
2147 }
2148 if (val > 0x10FFFF)
2149 outofrange = val;
2150
2151 ptr++;
2152 cur = *ptr;
2153 }
2154 if (cur == ';')
2155 ptr++;
2156 } else if ((cur == '&') && (ptr[1] == '#')){
2157 ptr += 2;
2158 cur = *ptr;
2159 while (cur != ';') { /* Non input consuming loops */
2160 if ((cur >= '0') && (cur <= '9'))
2161 val = val * 10 + (cur - '0');
2162 else {
2163 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2164 val = 0;
2165 break;
2166 }
2167 if (val > 0x10FFFF)
2168 outofrange = val;
2169
2170 ptr++;
2171 cur = *ptr;
2172 }
2173 if (cur == ';')
2174 ptr++;
2175 } else {
2176 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2177 return(0);
2178 }
2179 *str = ptr;
2180
2181 /*
2182 * [ WFC: Legal Character ]
2183 * Characters referred to using character references must match the
2184 * production for Char.
2185 */
2186 if ((IS_CHAR(val) && (outofrange == 0))) {
2187 return(val);
2188 } else {
2189 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2190 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2191 val);
2192 }
2193 return(0);
2194 }
2195
2196 /**
2197 * xmlNewBlanksWrapperInputStream:
2198 * @ctxt: an XML parser context
2199 * @entity: an Entity pointer
2200 *
2201 * Create a new input stream for wrapping
2202 * blanks around a PEReference
2203 *
2204 * Returns the new input stream or NULL
2205 */
2206
deallocblankswrapper(xmlChar * str)2207 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2208
2209 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2210 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2211 xmlParserInputPtr input;
2212 xmlChar *buffer;
2213 size_t length;
2214 if (entity == NULL) {
2215 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2216 "xmlNewBlanksWrapperInputStream entity\n");
2217 return(NULL);
2218 }
2219 if (xmlParserDebugEntities)
2220 xmlGenericError(xmlGenericErrorContext,
2221 "new blanks wrapper for entity: %s\n", entity->name);
2222 input = xmlNewInputStream(ctxt);
2223 if (input == NULL) {
2224 return(NULL);
2225 }
2226 length = xmlStrlen(entity->name) + 5;
2227 buffer = xmlMallocAtomic(length);
2228 if (buffer == NULL) {
2229 xmlErrMemory(ctxt, NULL);
2230 xmlFree(input);
2231 return(NULL);
2232 }
2233 buffer [0] = ' ';
2234 buffer [1] = '%';
2235 buffer [length-3] = ';';
2236 buffer [length-2] = ' ';
2237 buffer [length-1] = 0;
2238 memcpy(buffer + 2, entity->name, length - 5);
2239 input->free = deallocblankswrapper;
2240 input->base = buffer;
2241 input->cur = buffer;
2242 input->length = length;
2243 input->end = &buffer[length];
2244 return(input);
2245 }
2246
2247 /**
2248 * xmlParserHandlePEReference:
2249 * @ctxt: the parser context
2250 *
2251 * [69] PEReference ::= '%' Name ';'
2252 *
2253 * [ WFC: No Recursion ]
2254 * A parsed entity must not contain a recursive
2255 * reference to itself, either directly or indirectly.
2256 *
2257 * [ WFC: Entity Declared ]
2258 * In a document without any DTD, a document with only an internal DTD
2259 * subset which contains no parameter entity references, or a document
2260 * with "standalone='yes'", ... ... The declaration of a parameter
2261 * entity must precede any reference to it...
2262 *
2263 * [ VC: Entity Declared ]
2264 * In a document with an external subset or external parameter entities
2265 * with "standalone='no'", ... ... The declaration of a parameter entity
2266 * must precede any reference to it...
2267 *
2268 * [ WFC: In DTD ]
2269 * Parameter-entity references may only appear in the DTD.
2270 * NOTE: misleading but this is handled.
2271 *
2272 * A PEReference may have been detected in the current input stream
2273 * the handling is done accordingly to
2274 * http://www.w3.org/TR/REC-xml#entproc
2275 * i.e.
2276 * - Included in literal in entity values
2277 * - Included as Parameter Entity reference within DTDs
2278 */
2279 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2280 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2281 const xmlChar *name;
2282 xmlEntityPtr entity = NULL;
2283 xmlParserInputPtr input;
2284
2285 if (RAW != '%') return;
2286 switch(ctxt->instate) {
2287 case XML_PARSER_CDATA_SECTION:
2288 return;
2289 case XML_PARSER_COMMENT:
2290 return;
2291 case XML_PARSER_START_TAG:
2292 return;
2293 case XML_PARSER_END_TAG:
2294 return;
2295 case XML_PARSER_EOF:
2296 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2297 return;
2298 case XML_PARSER_PROLOG:
2299 case XML_PARSER_START:
2300 case XML_PARSER_MISC:
2301 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2302 return;
2303 case XML_PARSER_ENTITY_DECL:
2304 case XML_PARSER_CONTENT:
2305 case XML_PARSER_ATTRIBUTE_VALUE:
2306 case XML_PARSER_PI:
2307 case XML_PARSER_SYSTEM_LITERAL:
2308 case XML_PARSER_PUBLIC_LITERAL:
2309 /* we just ignore it there */
2310 return;
2311 case XML_PARSER_EPILOG:
2312 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2313 return;
2314 case XML_PARSER_ENTITY_VALUE:
2315 /*
2316 * NOTE: in the case of entity values, we don't do the
2317 * substitution here since we need the literal
2318 * entity value to be able to save the internal
2319 * subset of the document.
2320 * This will be handled by xmlStringDecodeEntities
2321 */
2322 return;
2323 case XML_PARSER_DTD:
2324 /*
2325 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2326 * In the internal DTD subset, parameter-entity references
2327 * can occur only where markup declarations can occur, not
2328 * within markup declarations.
2329 * In that case this is handled in xmlParseMarkupDecl
2330 */
2331 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2332 return;
2333 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2334 return;
2335 break;
2336 case XML_PARSER_IGNORE:
2337 return;
2338 }
2339
2340 NEXT;
2341 name = xmlParseName(ctxt);
2342 if (xmlParserDebugEntities)
2343 xmlGenericError(xmlGenericErrorContext,
2344 "PEReference: %s\n", name);
2345 if (name == NULL) {
2346 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2347 } else {
2348 if (RAW == ';') {
2349 NEXT;
2350 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2351 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2352 if (entity == NULL) {
2353
2354 /*
2355 * [ WFC: Entity Declared ]
2356 * In a document without any DTD, a document with only an
2357 * internal DTD subset which contains no parameter entity
2358 * references, or a document with "standalone='yes'", ...
2359 * ... The declaration of a parameter entity must precede
2360 * any reference to it...
2361 */
2362 if ((ctxt->standalone == 1) ||
2363 ((ctxt->hasExternalSubset == 0) &&
2364 (ctxt->hasPErefs == 0))) {
2365 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2366 "PEReference: %%%s; not found\n", name);
2367 } else {
2368 /*
2369 * [ VC: Entity Declared ]
2370 * In a document with an external subset or external
2371 * parameter entities with "standalone='no'", ...
2372 * ... The declaration of a parameter entity must precede
2373 * any reference to it...
2374 */
2375 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2376 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2377 "PEReference: %%%s; not found\n",
2378 name, NULL);
2379 } else
2380 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2381 "PEReference: %%%s; not found\n",
2382 name, NULL);
2383 ctxt->valid = 0;
2384 }
2385 } else if (ctxt->input->free != deallocblankswrapper) {
2386 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2387 if (xmlPushInput(ctxt, input) < 0)
2388 return;
2389 } else {
2390 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2391 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2392 xmlChar start[4];
2393 xmlCharEncoding enc;
2394
2395 /*
2396 * handle the extra spaces added before and after
2397 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2398 * this is done independently.
2399 */
2400 input = xmlNewEntityInputStream(ctxt, entity);
2401 if (xmlPushInput(ctxt, input) < 0)
2402 return;
2403
2404 /*
2405 * Get the 4 first bytes and decode the charset
2406 * if enc != XML_CHAR_ENCODING_NONE
2407 * plug some encoding conversion routines.
2408 * Note that, since we may have some non-UTF8
2409 * encoding (like UTF16, bug 135229), the 'length'
2410 * is not known, but we can calculate based upon
2411 * the amount of data in the buffer.
2412 */
2413 GROW
2414 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2415 start[0] = RAW;
2416 start[1] = NXT(1);
2417 start[2] = NXT(2);
2418 start[3] = NXT(3);
2419 enc = xmlDetectCharEncoding(start, 4);
2420 if (enc != XML_CHAR_ENCODING_NONE) {
2421 xmlSwitchEncoding(ctxt, enc);
2422 }
2423 }
2424
2425 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2426 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2427 (IS_BLANK_CH(NXT(5)))) {
2428 xmlParseTextDecl(ctxt);
2429 }
2430 } else {
2431 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2432 "PEReference: %s is not a parameter entity\n",
2433 name);
2434 }
2435 }
2436 } else {
2437 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2438 }
2439 }
2440 }
2441
2442 /*
2443 * Macro used to grow the current buffer.
2444 */
2445 #define growBuffer(buffer, n) { \
2446 xmlChar *tmp; \
2447 buffer##_size *= 2; \
2448 buffer##_size += n; \
2449 tmp = (xmlChar *) \
2450 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2451 if (tmp == NULL) goto mem_error; \
2452 buffer = tmp; \
2453 }
2454
2455 /**
2456 * xmlStringLenDecodeEntities:
2457 * @ctxt: the parser context
2458 * @str: the input string
2459 * @len: the string length
2460 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2461 * @end: an end marker xmlChar, 0 if none
2462 * @end2: an end marker xmlChar, 0 if none
2463 * @end3: an end marker xmlChar, 0 if none
2464 *
2465 * Takes a entity string content and process to do the adequate substitutions.
2466 *
2467 * [67] Reference ::= EntityRef | CharRef
2468 *
2469 * [69] PEReference ::= '%' Name ';'
2470 *
2471 * Returns A newly allocated string with the substitution done. The caller
2472 * must deallocate it !
2473 */
2474 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2475 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2476 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2477 xmlChar *buffer = NULL;
2478 int buffer_size = 0;
2479
2480 xmlChar *current = NULL;
2481 xmlChar *rep = NULL;
2482 const xmlChar *last;
2483 xmlEntityPtr ent;
2484 int c,l;
2485 int nbchars = 0;
2486
2487 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2488 return(NULL);
2489 last = str + len;
2490
2491 if (((ctxt->depth > 40) &&
2492 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2493 (ctxt->depth > 1024)) {
2494 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2495 return(NULL);
2496 }
2497
2498 /*
2499 * allocate a translation buffer.
2500 */
2501 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2502 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2503 if (buffer == NULL) goto mem_error;
2504
2505 /*
2506 * OK loop until we reach one of the ending char or a size limit.
2507 * we are operating on already parsed values.
2508 */
2509 if (str < last)
2510 c = CUR_SCHAR(str, l);
2511 else
2512 c = 0;
2513 while ((c != 0) && (c != end) && /* non input consuming loop */
2514 (c != end2) && (c != end3)) {
2515
2516 if (c == 0) break;
2517 if ((c == '&') && (str[1] == '#')) {
2518 int val = xmlParseStringCharRef(ctxt, &str);
2519 if (val != 0) {
2520 COPY_BUF(0,buffer,nbchars,val);
2521 }
2522 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2523 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2524 }
2525 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2526 if (xmlParserDebugEntities)
2527 xmlGenericError(xmlGenericErrorContext,
2528 "String decoding Entity Reference: %.30s\n",
2529 str);
2530 ent = xmlParseStringEntityRef(ctxt, &str);
2531 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2532 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2533 goto int_error;
2534 if (ent != NULL)
2535 ctxt->nbentities += ent->checked;
2536 if ((ent != NULL) &&
2537 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2538 if (ent->content != NULL) {
2539 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2540 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2541 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2542 }
2543 } else {
2544 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2545 "predefined entity has no content\n");
2546 }
2547 } else if ((ent != NULL) && (ent->content != NULL)) {
2548 ctxt->depth++;
2549 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2550 0, 0, 0);
2551 ctxt->depth--;
2552
2553 if (rep != NULL) {
2554 current = rep;
2555 while (*current != 0) { /* non input consuming loop */
2556 buffer[nbchars++] = *current++;
2557 if (nbchars >
2558 buffer_size - XML_PARSER_BUFFER_SIZE) {
2559 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2560 goto int_error;
2561 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2562 }
2563 }
2564 xmlFree(rep);
2565 rep = NULL;
2566 }
2567 } else if (ent != NULL) {
2568 int i = xmlStrlen(ent->name);
2569 const xmlChar *cur = ent->name;
2570
2571 buffer[nbchars++] = '&';
2572 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2573 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2574 }
2575 for (;i > 0;i--)
2576 buffer[nbchars++] = *cur++;
2577 buffer[nbchars++] = ';';
2578 }
2579 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2580 if (xmlParserDebugEntities)
2581 xmlGenericError(xmlGenericErrorContext,
2582 "String decoding PE Reference: %.30s\n", str);
2583 ent = xmlParseStringPEReference(ctxt, &str);
2584 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2585 goto int_error;
2586 if (ent != NULL)
2587 ctxt->nbentities += ent->checked;
2588 if (ent != NULL) {
2589 if (ent->content == NULL) {
2590 xmlLoadEntityContent(ctxt, ent);
2591 }
2592 ctxt->depth++;
2593 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2594 0, 0, 0);
2595 ctxt->depth--;
2596 if (rep != NULL) {
2597 current = rep;
2598 while (*current != 0) { /* non input consuming loop */
2599 buffer[nbchars++] = *current++;
2600 if (nbchars >
2601 buffer_size - XML_PARSER_BUFFER_SIZE) {
2602 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2603 goto int_error;
2604 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2605 }
2606 }
2607 xmlFree(rep);
2608 rep = NULL;
2609 }
2610 }
2611 } else {
2612 COPY_BUF(l,buffer,nbchars,c);
2613 str += l;
2614 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2615 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2616 }
2617 }
2618 if (str < last)
2619 c = CUR_SCHAR(str, l);
2620 else
2621 c = 0;
2622 }
2623 buffer[nbchars++] = 0;
2624 return(buffer);
2625
2626 mem_error:
2627 xmlErrMemory(ctxt, NULL);
2628 int_error:
2629 if (rep != NULL)
2630 xmlFree(rep);
2631 if (buffer != NULL)
2632 xmlFree(buffer);
2633 return(NULL);
2634 }
2635
2636 /**
2637 * xmlStringDecodeEntities:
2638 * @ctxt: the parser context
2639 * @str: the input string
2640 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2641 * @end: an end marker xmlChar, 0 if none
2642 * @end2: an end marker xmlChar, 0 if none
2643 * @end3: an end marker xmlChar, 0 if none
2644 *
2645 * Takes a entity string content and process to do the adequate substitutions.
2646 *
2647 * [67] Reference ::= EntityRef | CharRef
2648 *
2649 * [69] PEReference ::= '%' Name ';'
2650 *
2651 * Returns A newly allocated string with the substitution done. The caller
2652 * must deallocate it !
2653 */
2654 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2655 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2656 xmlChar end, xmlChar end2, xmlChar end3) {
2657 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2658 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2659 end, end2, end3));
2660 }
2661
2662 /************************************************************************
2663 * *
2664 * Commodity functions, cleanup needed ? *
2665 * *
2666 ************************************************************************/
2667
2668 /**
2669 * areBlanks:
2670 * @ctxt: an XML parser context
2671 * @str: a xmlChar *
2672 * @len: the size of @str
2673 * @blank_chars: we know the chars are blanks
2674 *
2675 * Is this a sequence of blank chars that one can ignore ?
2676 *
2677 * Returns 1 if ignorable 0 otherwise.
2678 */
2679
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2680 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2681 int blank_chars) {
2682 int i, ret;
2683 xmlNodePtr lastChild;
2684
2685 /*
2686 * Don't spend time trying to differentiate them, the same callback is
2687 * used !
2688 */
2689 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2690 return(0);
2691
2692 /*
2693 * Check for xml:space value.
2694 */
2695 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2696 (*(ctxt->space) == -2))
2697 return(0);
2698
2699 /*
2700 * Check that the string is made of blanks
2701 */
2702 if (blank_chars == 0) {
2703 for (i = 0;i < len;i++)
2704 if (!(IS_BLANK_CH(str[i]))) return(0);
2705 }
2706
2707 /*
2708 * Look if the element is mixed content in the DTD if available
2709 */
2710 if (ctxt->node == NULL) return(0);
2711 if (ctxt->myDoc != NULL) {
2712 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2713 if (ret == 0) return(1);
2714 if (ret == 1) return(0);
2715 }
2716
2717 /*
2718 * Otherwise, heuristic :-\
2719 */
2720 if ((RAW != '<') && (RAW != 0xD)) return(0);
2721 if ((ctxt->node->children == NULL) &&
2722 (RAW == '<') && (NXT(1) == '/')) return(0);
2723
2724 lastChild = xmlGetLastChild(ctxt->node);
2725 if (lastChild == NULL) {
2726 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2727 (ctxt->node->content != NULL)) return(0);
2728 } else if (xmlNodeIsText(lastChild))
2729 return(0);
2730 else if ((ctxt->node->children != NULL) &&
2731 (xmlNodeIsText(ctxt->node->children)))
2732 return(0);
2733 return(1);
2734 }
2735
2736 /************************************************************************
2737 * *
2738 * Extra stuff for namespace support *
2739 * Relates to http://www.w3.org/TR/WD-xml-names *
2740 * *
2741 ************************************************************************/
2742
2743 /**
2744 * xmlSplitQName:
2745 * @ctxt: an XML parser context
2746 * @name: an XML parser context
2747 * @prefix: a xmlChar **
2748 *
2749 * parse an UTF8 encoded XML qualified name string
2750 *
2751 * [NS 5] QName ::= (Prefix ':')? LocalPart
2752 *
2753 * [NS 6] Prefix ::= NCName
2754 *
2755 * [NS 7] LocalPart ::= NCName
2756 *
2757 * Returns the local part, and prefix is updated
2758 * to get the Prefix if any.
2759 */
2760
2761 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2762 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2763 xmlChar buf[XML_MAX_NAMELEN + 5];
2764 xmlChar *buffer = NULL;
2765 int len = 0;
2766 int max = XML_MAX_NAMELEN;
2767 xmlChar *ret = NULL;
2768 const xmlChar *cur = name;
2769 int c;
2770
2771 if (prefix == NULL) return(NULL);
2772 *prefix = NULL;
2773
2774 if (cur == NULL) return(NULL);
2775
2776 #ifndef XML_XML_NAMESPACE
2777 /* xml: prefix is not really a namespace */
2778 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2779 (cur[2] == 'l') && (cur[3] == ':'))
2780 return(xmlStrdup(name));
2781 #endif
2782
2783 /* nasty but well=formed */
2784 if (cur[0] == ':')
2785 return(xmlStrdup(name));
2786
2787 c = *cur++;
2788 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2789 buf[len++] = c;
2790 c = *cur++;
2791 }
2792 if (len >= max) {
2793 /*
2794 * Okay someone managed to make a huge name, so he's ready to pay
2795 * for the processing speed.
2796 */
2797 max = len * 2;
2798
2799 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2800 if (buffer == NULL) {
2801 xmlErrMemory(ctxt, NULL);
2802 return(NULL);
2803 }
2804 memcpy(buffer, buf, len);
2805 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2806 if (len + 10 > max) {
2807 xmlChar *tmp;
2808
2809 max *= 2;
2810 tmp = (xmlChar *) xmlRealloc(buffer,
2811 max * sizeof(xmlChar));
2812 if (tmp == NULL) {
2813 xmlFree(buffer);
2814 xmlErrMemory(ctxt, NULL);
2815 return(NULL);
2816 }
2817 buffer = tmp;
2818 }
2819 buffer[len++] = c;
2820 c = *cur++;
2821 }
2822 buffer[len] = 0;
2823 }
2824
2825 if ((c == ':') && (*cur == 0)) {
2826 if (buffer != NULL)
2827 xmlFree(buffer);
2828 *prefix = NULL;
2829 return(xmlStrdup(name));
2830 }
2831
2832 if (buffer == NULL)
2833 ret = xmlStrndup(buf, len);
2834 else {
2835 ret = buffer;
2836 buffer = NULL;
2837 max = XML_MAX_NAMELEN;
2838 }
2839
2840
2841 if (c == ':') {
2842 c = *cur;
2843 *prefix = ret;
2844 if (c == 0) {
2845 return(xmlStrndup(BAD_CAST "", 0));
2846 }
2847 len = 0;
2848
2849 /*
2850 * Check that the first character is proper to start
2851 * a new name
2852 */
2853 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2854 ((c >= 0x41) && (c <= 0x5A)) ||
2855 (c == '_') || (c == ':'))) {
2856 int l;
2857 int first = CUR_SCHAR(cur, l);
2858
2859 if (!IS_LETTER(first) && (first != '_')) {
2860 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2861 "Name %s is not XML Namespace compliant\n",
2862 name);
2863 }
2864 }
2865 cur++;
2866
2867 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2868 buf[len++] = c;
2869 c = *cur++;
2870 }
2871 if (len >= max) {
2872 /*
2873 * Okay someone managed to make a huge name, so he's ready to pay
2874 * for the processing speed.
2875 */
2876 max = len * 2;
2877
2878 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2879 if (buffer == NULL) {
2880 xmlErrMemory(ctxt, NULL);
2881 return(NULL);
2882 }
2883 memcpy(buffer, buf, len);
2884 while (c != 0) { /* tested bigname2.xml */
2885 if (len + 10 > max) {
2886 xmlChar *tmp;
2887
2888 max *= 2;
2889 tmp = (xmlChar *) xmlRealloc(buffer,
2890 max * sizeof(xmlChar));
2891 if (tmp == NULL) {
2892 xmlErrMemory(ctxt, NULL);
2893 xmlFree(buffer);
2894 return(NULL);
2895 }
2896 buffer = tmp;
2897 }
2898 buffer[len++] = c;
2899 c = *cur++;
2900 }
2901 buffer[len] = 0;
2902 }
2903
2904 if (buffer == NULL)
2905 ret = xmlStrndup(buf, len);
2906 else {
2907 ret = buffer;
2908 }
2909 }
2910
2911 return(ret);
2912 }
2913
2914 /************************************************************************
2915 * *
2916 * The parser itself *
2917 * Relates to http://www.w3.org/TR/REC-xml *
2918 * *
2919 ************************************************************************/
2920
2921 /************************************************************************
2922 * *
2923 * Routines to parse Name, NCName and NmToken *
2924 * *
2925 ************************************************************************/
2926 unsigned long nbParseName = 0;
2927 unsigned long nbParseNmToken = 0;
2928 unsigned long nbParseNCName = 0;
2929 unsigned long nbParseNCNameComplex = 0;
2930 unsigned long nbParseNameComplex = 0;
2931 unsigned long nbParseStringName = 0;
2932 /*
2933 * The two following functions are related to the change of accepted
2934 * characters for Name and NmToken in the Revision 5 of XML-1.0
2935 * They correspond to the modified production [4] and the new production [4a]
2936 * changes in that revision. Also note that the macros used for the
2937 * productions Letter, Digit, CombiningChar and Extender are not needed
2938 * anymore.
2939 * We still keep compatibility to pre-revision5 parsing semantic if the
2940 * new XML_PARSE_OLD10 option is given to the parser.
2941 */
2942 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)2943 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2944 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2945 /*
2946 * Use the new checks of production [4] [4a] amd [5] of the
2947 * Update 5 of XML-1.0
2948 */
2949 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2950 (((c >= 'a') && (c <= 'z')) ||
2951 ((c >= 'A') && (c <= 'Z')) ||
2952 (c == '_') || (c == ':') ||
2953 ((c >= 0xC0) && (c <= 0xD6)) ||
2954 ((c >= 0xD8) && (c <= 0xF6)) ||
2955 ((c >= 0xF8) && (c <= 0x2FF)) ||
2956 ((c >= 0x370) && (c <= 0x37D)) ||
2957 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2958 ((c >= 0x200C) && (c <= 0x200D)) ||
2959 ((c >= 0x2070) && (c <= 0x218F)) ||
2960 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2961 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2962 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2963 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2964 ((c >= 0x10000) && (c <= 0xEFFFF))))
2965 return(1);
2966 } else {
2967 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2968 return(1);
2969 }
2970 return(0);
2971 }
2972
2973 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)2974 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2975 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2976 /*
2977 * Use the new checks of production [4] [4a] amd [5] of the
2978 * Update 5 of XML-1.0
2979 */
2980 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2981 (((c >= 'a') && (c <= 'z')) ||
2982 ((c >= 'A') && (c <= 'Z')) ||
2983 ((c >= '0') && (c <= '9')) || /* !start */
2984 (c == '_') || (c == ':') ||
2985 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2986 ((c >= 0xC0) && (c <= 0xD6)) ||
2987 ((c >= 0xD8) && (c <= 0xF6)) ||
2988 ((c >= 0xF8) && (c <= 0x2FF)) ||
2989 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2990 ((c >= 0x370) && (c <= 0x37D)) ||
2991 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2992 ((c >= 0x200C) && (c <= 0x200D)) ||
2993 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2994 ((c >= 0x2070) && (c <= 0x218F)) ||
2995 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2996 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2997 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2998 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2999 ((c >= 0x10000) && (c <= 0xEFFFF))))
3000 return(1);
3001 } else {
3002 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3003 (c == '.') || (c == '-') ||
3004 (c == '_') || (c == ':') ||
3005 (IS_COMBINING(c)) ||
3006 (IS_EXTENDER(c)))
3007 return(1);
3008 }
3009 return(0);
3010 }
3011
3012 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3013 int *len, int *alloc, int normalize);
3014
3015 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3016 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3017 int len = 0, l;
3018 int c;
3019 int count = 0;
3020
3021 nbParseNameComplex++;
3022
3023 /*
3024 * Handler for more complex cases
3025 */
3026 GROW;
3027 c = CUR_CHAR(l);
3028 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3029 /*
3030 * Use the new checks of production [4] [4a] amd [5] of the
3031 * Update 5 of XML-1.0
3032 */
3033 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3034 (!(((c >= 'a') && (c <= 'z')) ||
3035 ((c >= 'A') && (c <= 'Z')) ||
3036 (c == '_') || (c == ':') ||
3037 ((c >= 0xC0) && (c <= 0xD6)) ||
3038 ((c >= 0xD8) && (c <= 0xF6)) ||
3039 ((c >= 0xF8) && (c <= 0x2FF)) ||
3040 ((c >= 0x370) && (c <= 0x37D)) ||
3041 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3042 ((c >= 0x200C) && (c <= 0x200D)) ||
3043 ((c >= 0x2070) && (c <= 0x218F)) ||
3044 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3045 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3046 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3047 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3048 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3049 return(NULL);
3050 }
3051 len += l;
3052 NEXTL(l);
3053 c = CUR_CHAR(l);
3054 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3055 (((c >= 'a') && (c <= 'z')) ||
3056 ((c >= 'A') && (c <= 'Z')) ||
3057 ((c >= '0') && (c <= '9')) || /* !start */
3058 (c == '_') || (c == ':') ||
3059 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3060 ((c >= 0xC0) && (c <= 0xD6)) ||
3061 ((c >= 0xD8) && (c <= 0xF6)) ||
3062 ((c >= 0xF8) && (c <= 0x2FF)) ||
3063 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3064 ((c >= 0x370) && (c <= 0x37D)) ||
3065 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3066 ((c >= 0x200C) && (c <= 0x200D)) ||
3067 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3068 ((c >= 0x2070) && (c <= 0x218F)) ||
3069 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3070 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3071 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3072 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3073 ((c >= 0x10000) && (c <= 0xEFFFF))
3074 )) {
3075 if (count++ > 100) {
3076 count = 0;
3077 GROW;
3078 }
3079 len += l;
3080 NEXTL(l);
3081 c = CUR_CHAR(l);
3082 }
3083 } else {
3084 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3085 (!IS_LETTER(c) && (c != '_') &&
3086 (c != ':'))) {
3087 return(NULL);
3088 }
3089 len += l;
3090 NEXTL(l);
3091 c = CUR_CHAR(l);
3092
3093 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3094 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3095 (c == '.') || (c == '-') ||
3096 (c == '_') || (c == ':') ||
3097 (IS_COMBINING(c)) ||
3098 (IS_EXTENDER(c)))) {
3099 if (count++ > 100) {
3100 count = 0;
3101 GROW;
3102 }
3103 len += l;
3104 NEXTL(l);
3105 c = CUR_CHAR(l);
3106 }
3107 }
3108 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3109 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3110 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3111 }
3112
3113 /**
3114 * xmlParseName:
3115 * @ctxt: an XML parser context
3116 *
3117 * parse an XML name.
3118 *
3119 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3120 * CombiningChar | Extender
3121 *
3122 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3123 *
3124 * [6] Names ::= Name (#x20 Name)*
3125 *
3126 * Returns the Name parsed or NULL
3127 */
3128
3129 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3130 xmlParseName(xmlParserCtxtPtr ctxt) {
3131 const xmlChar *in;
3132 const xmlChar *ret;
3133 int count = 0;
3134
3135 GROW;
3136
3137 nbParseName++;
3138
3139 /*
3140 * Accelerator for simple ASCII names
3141 */
3142 in = ctxt->input->cur;
3143 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3144 ((*in >= 0x41) && (*in <= 0x5A)) ||
3145 (*in == '_') || (*in == ':')) {
3146 in++;
3147 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3148 ((*in >= 0x41) && (*in <= 0x5A)) ||
3149 ((*in >= 0x30) && (*in <= 0x39)) ||
3150 (*in == '_') || (*in == '-') ||
3151 (*in == ':') || (*in == '.'))
3152 in++;
3153 if ((*in > 0) && (*in < 0x80)) {
3154 count = in - ctxt->input->cur;
3155 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3156 ctxt->input->cur = in;
3157 ctxt->nbChars += count;
3158 ctxt->input->col += count;
3159 if (ret == NULL)
3160 xmlErrMemory(ctxt, NULL);
3161 return(ret);
3162 }
3163 }
3164 /* accelerator for special cases */
3165 return(xmlParseNameComplex(ctxt));
3166 }
3167
3168 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3169 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3170 int len = 0, l;
3171 int c;
3172 int count = 0;
3173
3174 nbParseNCNameComplex++;
3175
3176 /*
3177 * Handler for more complex cases
3178 */
3179 GROW;
3180 c = CUR_CHAR(l);
3181 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3182 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3183 return(NULL);
3184 }
3185
3186 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3187 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3188 if (count++ > 100) {
3189 count = 0;
3190 GROW;
3191 }
3192 len += l;
3193 NEXTL(l);
3194 c = CUR_CHAR(l);
3195 }
3196 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3197 }
3198
3199 /**
3200 * xmlParseNCName:
3201 * @ctxt: an XML parser context
3202 * @len: lenght of the string parsed
3203 *
3204 * parse an XML name.
3205 *
3206 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3207 * CombiningChar | Extender
3208 *
3209 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3210 *
3211 * Returns the Name parsed or NULL
3212 */
3213
3214 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3215 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3216 const xmlChar *in;
3217 const xmlChar *ret;
3218 int count = 0;
3219
3220 nbParseNCName++;
3221
3222 /*
3223 * Accelerator for simple ASCII names
3224 */
3225 in = ctxt->input->cur;
3226 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3227 ((*in >= 0x41) && (*in <= 0x5A)) ||
3228 (*in == '_')) {
3229 in++;
3230 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3231 ((*in >= 0x41) && (*in <= 0x5A)) ||
3232 ((*in >= 0x30) && (*in <= 0x39)) ||
3233 (*in == '_') || (*in == '-') ||
3234 (*in == '.'))
3235 in++;
3236 if ((*in > 0) && (*in < 0x80)) {
3237 count = in - ctxt->input->cur;
3238 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3239 ctxt->input->cur = in;
3240 ctxt->nbChars += count;
3241 ctxt->input->col += count;
3242 if (ret == NULL) {
3243 xmlErrMemory(ctxt, NULL);
3244 }
3245 return(ret);
3246 }
3247 }
3248 return(xmlParseNCNameComplex(ctxt));
3249 }
3250
3251 /**
3252 * xmlParseNameAndCompare:
3253 * @ctxt: an XML parser context
3254 *
3255 * parse an XML name and compares for match
3256 * (specialized for endtag parsing)
3257 *
3258 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3259 * and the name for mismatch
3260 */
3261
3262 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3263 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3264 register const xmlChar *cmp = other;
3265 register const xmlChar *in;
3266 const xmlChar *ret;
3267
3268 GROW;
3269
3270 in = ctxt->input->cur;
3271 while (*in != 0 && *in == *cmp) {
3272 ++in;
3273 ++cmp;
3274 ctxt->input->col++;
3275 }
3276 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3277 /* success */
3278 ctxt->input->cur = in;
3279 return (const xmlChar*) 1;
3280 }
3281 /* failure (or end of input buffer), check with full function */
3282 ret = xmlParseName (ctxt);
3283 /* strings coming from the dictionnary direct compare possible */
3284 if (ret == other) {
3285 return (const xmlChar*) 1;
3286 }
3287 return ret;
3288 }
3289
3290 /**
3291 * xmlParseStringName:
3292 * @ctxt: an XML parser context
3293 * @str: a pointer to the string pointer (IN/OUT)
3294 *
3295 * parse an XML name.
3296 *
3297 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3298 * CombiningChar | Extender
3299 *
3300 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3301 *
3302 * [6] Names ::= Name (#x20 Name)*
3303 *
3304 * Returns the Name parsed or NULL. The @str pointer
3305 * is updated to the current location in the string.
3306 */
3307
3308 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3309 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3310 xmlChar buf[XML_MAX_NAMELEN + 5];
3311 const xmlChar *cur = *str;
3312 int len = 0, l;
3313 int c;
3314
3315 nbParseStringName++;
3316
3317 c = CUR_SCHAR(cur, l);
3318 if (!xmlIsNameStartChar(ctxt, c)) {
3319 return(NULL);
3320 }
3321
3322 COPY_BUF(l,buf,len,c);
3323 cur += l;
3324 c = CUR_SCHAR(cur, l);
3325 while (xmlIsNameChar(ctxt, c)) {
3326 COPY_BUF(l,buf,len,c);
3327 cur += l;
3328 c = CUR_SCHAR(cur, l);
3329 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3330 /*
3331 * Okay someone managed to make a huge name, so he's ready to pay
3332 * for the processing speed.
3333 */
3334 xmlChar *buffer;
3335 int max = len * 2;
3336
3337 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3338 if (buffer == NULL) {
3339 xmlErrMemory(ctxt, NULL);
3340 return(NULL);
3341 }
3342 memcpy(buffer, buf, len);
3343 while (xmlIsNameChar(ctxt, c)) {
3344 if (len + 10 > max) {
3345 xmlChar *tmp;
3346 max *= 2;
3347 tmp = (xmlChar *) xmlRealloc(buffer,
3348 max * sizeof(xmlChar));
3349 if (tmp == NULL) {
3350 xmlErrMemory(ctxt, NULL);
3351 xmlFree(buffer);
3352 return(NULL);
3353 }
3354 buffer = tmp;
3355 }
3356 COPY_BUF(l,buffer,len,c);
3357 cur += l;
3358 c = CUR_SCHAR(cur, l);
3359 }
3360 buffer[len] = 0;
3361 *str = cur;
3362 return(buffer);
3363 }
3364 }
3365 *str = cur;
3366 return(xmlStrndup(buf, len));
3367 }
3368
3369 /**
3370 * xmlParseNmtoken:
3371 * @ctxt: an XML parser context
3372 *
3373 * parse an XML Nmtoken.
3374 *
3375 * [7] Nmtoken ::= (NameChar)+
3376 *
3377 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3378 *
3379 * Returns the Nmtoken parsed or NULL
3380 */
3381
3382 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3383 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3384 xmlChar buf[XML_MAX_NAMELEN + 5];
3385 int len = 0, l;
3386 int c;
3387 int count = 0;
3388
3389 nbParseNmToken++;
3390
3391 GROW;
3392 c = CUR_CHAR(l);
3393
3394 while (xmlIsNameChar(ctxt, c)) {
3395 if (count++ > 100) {
3396 count = 0;
3397 GROW;
3398 }
3399 COPY_BUF(l,buf,len,c);
3400 NEXTL(l);
3401 c = CUR_CHAR(l);
3402 if (len >= XML_MAX_NAMELEN) {
3403 /*
3404 * Okay someone managed to make a huge token, so he's ready to pay
3405 * for the processing speed.
3406 */
3407 xmlChar *buffer;
3408 int max = len * 2;
3409
3410 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3411 if (buffer == NULL) {
3412 xmlErrMemory(ctxt, NULL);
3413 return(NULL);
3414 }
3415 memcpy(buffer, buf, len);
3416 while (xmlIsNameChar(ctxt, c)) {
3417 if (count++ > 100) {
3418 count = 0;
3419 GROW;
3420 }
3421 if (len + 10 > max) {
3422 xmlChar *tmp;
3423
3424 max *= 2;
3425 tmp = (xmlChar *) xmlRealloc(buffer,
3426 max * sizeof(xmlChar));
3427 if (tmp == NULL) {
3428 xmlErrMemory(ctxt, NULL);
3429 xmlFree(buffer);
3430 return(NULL);
3431 }
3432 buffer = tmp;
3433 }
3434 COPY_BUF(l,buffer,len,c);
3435 NEXTL(l);
3436 c = CUR_CHAR(l);
3437 }
3438 buffer[len] = 0;
3439 return(buffer);
3440 }
3441 }
3442 if (len == 0)
3443 return(NULL);
3444 return(xmlStrndup(buf, len));
3445 }
3446
3447 /**
3448 * xmlParseEntityValue:
3449 * @ctxt: an XML parser context
3450 * @orig: if non-NULL store a copy of the original entity value
3451 *
3452 * parse a value for ENTITY declarations
3453 *
3454 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3455 * "'" ([^%&'] | PEReference | Reference)* "'"
3456 *
3457 * Returns the EntityValue parsed with reference substituted or NULL
3458 */
3459
3460 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3461 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3462 xmlChar *buf = NULL;
3463 int len = 0;
3464 int size = XML_PARSER_BUFFER_SIZE;
3465 int c, l;
3466 xmlChar stop;
3467 xmlChar *ret = NULL;
3468 const xmlChar *cur = NULL;
3469 xmlParserInputPtr input;
3470
3471 if (RAW == '"') stop = '"';
3472 else if (RAW == '\'') stop = '\'';
3473 else {
3474 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3475 return(NULL);
3476 }
3477 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3478 if (buf == NULL) {
3479 xmlErrMemory(ctxt, NULL);
3480 return(NULL);
3481 }
3482
3483 /*
3484 * The content of the entity definition is copied in a buffer.
3485 */
3486
3487 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3488 input = ctxt->input;
3489 GROW;
3490 NEXT;
3491 c = CUR_CHAR(l);
3492 /*
3493 * NOTE: 4.4.5 Included in Literal
3494 * When a parameter entity reference appears in a literal entity
3495 * value, ... a single or double quote character in the replacement
3496 * text is always treated as a normal data character and will not
3497 * terminate the literal.
3498 * In practice it means we stop the loop only when back at parsing
3499 * the initial entity and the quote is found
3500 */
3501 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3502 (ctxt->input != input))) {
3503 if (len + 5 >= size) {
3504 xmlChar *tmp;
3505
3506 size *= 2;
3507 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3508 if (tmp == NULL) {
3509 xmlErrMemory(ctxt, NULL);
3510 xmlFree(buf);
3511 return(NULL);
3512 }
3513 buf = tmp;
3514 }
3515 COPY_BUF(l,buf,len,c);
3516 NEXTL(l);
3517 /*
3518 * Pop-up of finished entities.
3519 */
3520 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3521 xmlPopInput(ctxt);
3522
3523 GROW;
3524 c = CUR_CHAR(l);
3525 if (c == 0) {
3526 GROW;
3527 c = CUR_CHAR(l);
3528 }
3529 }
3530 buf[len] = 0;
3531
3532 /*
3533 * Raise problem w.r.t. '&' and '%' being used in non-entities
3534 * reference constructs. Note Charref will be handled in
3535 * xmlStringDecodeEntities()
3536 */
3537 cur = buf;
3538 while (*cur != 0) { /* non input consuming */
3539 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3540 xmlChar *name;
3541 xmlChar tmp = *cur;
3542
3543 cur++;
3544 name = xmlParseStringName(ctxt, &cur);
3545 if ((name == NULL) || (*cur != ';')) {
3546 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3547 "EntityValue: '%c' forbidden except for entities references\n",
3548 tmp);
3549 }
3550 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3551 (ctxt->inputNr == 1)) {
3552 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3553 }
3554 if (name != NULL)
3555 xmlFree(name);
3556 if (*cur == 0)
3557 break;
3558 }
3559 cur++;
3560 }
3561
3562 /*
3563 * Then PEReference entities are substituted.
3564 */
3565 if (c != stop) {
3566 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3567 xmlFree(buf);
3568 } else {
3569 NEXT;
3570 /*
3571 * NOTE: 4.4.7 Bypassed
3572 * When a general entity reference appears in the EntityValue in
3573 * an entity declaration, it is bypassed and left as is.
3574 * so XML_SUBSTITUTE_REF is not set here.
3575 */
3576 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3577 0, 0, 0);
3578 if (orig != NULL)
3579 *orig = buf;
3580 else
3581 xmlFree(buf);
3582 }
3583
3584 return(ret);
3585 }
3586
3587 /**
3588 * xmlParseAttValueComplex:
3589 * @ctxt: an XML parser context
3590 * @len: the resulting attribute len
3591 * @normalize: wether to apply the inner normalization
3592 *
3593 * parse a value for an attribute, this is the fallback function
3594 * of xmlParseAttValue() when the attribute parsing requires handling
3595 * of non-ASCII characters, or normalization compaction.
3596 *
3597 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3598 */
3599 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3600 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3601 xmlChar limit = 0;
3602 xmlChar *buf = NULL;
3603 xmlChar *rep = NULL;
3604 int len = 0;
3605 int buf_size = 0;
3606 int c, l, in_space = 0;
3607 xmlChar *current = NULL;
3608 xmlEntityPtr ent;
3609
3610 if (NXT(0) == '"') {
3611 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3612 limit = '"';
3613 NEXT;
3614 } else if (NXT(0) == '\'') {
3615 limit = '\'';
3616 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3617 NEXT;
3618 } else {
3619 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3620 return(NULL);
3621 }
3622
3623 /*
3624 * allocate a translation buffer.
3625 */
3626 buf_size = XML_PARSER_BUFFER_SIZE;
3627 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3628 if (buf == NULL) goto mem_error;
3629
3630 /*
3631 * OK loop until we reach one of the ending char or a size limit.
3632 */
3633 c = CUR_CHAR(l);
3634 while ((NXT(0) != limit) && /* checked */
3635 (IS_CHAR(c)) && (c != '<')) {
3636 if (c == 0) break;
3637 if (c == '&') {
3638 in_space = 0;
3639 if (NXT(1) == '#') {
3640 int val = xmlParseCharRef(ctxt);
3641
3642 if (val == '&') {
3643 if (ctxt->replaceEntities) {
3644 if (len > buf_size - 10) {
3645 growBuffer(buf, 10);
3646 }
3647 buf[len++] = '&';
3648 } else {
3649 /*
3650 * The reparsing will be done in xmlStringGetNodeList()
3651 * called by the attribute() function in SAX.c
3652 */
3653 if (len > buf_size - 10) {
3654 growBuffer(buf, 10);
3655 }
3656 buf[len++] = '&';
3657 buf[len++] = '#';
3658 buf[len++] = '3';
3659 buf[len++] = '8';
3660 buf[len++] = ';';
3661 }
3662 } else if (val != 0) {
3663 if (len > buf_size - 10) {
3664 growBuffer(buf, 10);
3665 }
3666 len += xmlCopyChar(0, &buf[len], val);
3667 }
3668 } else {
3669 ent = xmlParseEntityRef(ctxt);
3670 ctxt->nbentities++;
3671 if (ent != NULL)
3672 ctxt->nbentities += ent->owner;
3673 if ((ent != NULL) &&
3674 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3675 if (len > buf_size - 10) {
3676 growBuffer(buf, 10);
3677 }
3678 if ((ctxt->replaceEntities == 0) &&
3679 (ent->content[0] == '&')) {
3680 buf[len++] = '&';
3681 buf[len++] = '#';
3682 buf[len++] = '3';
3683 buf[len++] = '8';
3684 buf[len++] = ';';
3685 } else {
3686 buf[len++] = ent->content[0];
3687 }
3688 } else if ((ent != NULL) &&
3689 (ctxt->replaceEntities != 0)) {
3690 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3691 rep = xmlStringDecodeEntities(ctxt, ent->content,
3692 XML_SUBSTITUTE_REF,
3693 0, 0, 0);
3694 if (rep != NULL) {
3695 current = rep;
3696 while (*current != 0) { /* non input consuming */
3697 buf[len++] = *current++;
3698 if (len > buf_size - 10) {
3699 growBuffer(buf, 10);
3700 }
3701 }
3702 xmlFree(rep);
3703 rep = NULL;
3704 }
3705 } else {
3706 if (len > buf_size - 10) {
3707 growBuffer(buf, 10);
3708 }
3709 if (ent->content != NULL)
3710 buf[len++] = ent->content[0];
3711 }
3712 } else if (ent != NULL) {
3713 int i = xmlStrlen(ent->name);
3714 const xmlChar *cur = ent->name;
3715
3716 /*
3717 * This may look absurd but is needed to detect
3718 * entities problems
3719 */
3720 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3721 (ent->content != NULL)) {
3722 rep = xmlStringDecodeEntities(ctxt, ent->content,
3723 XML_SUBSTITUTE_REF, 0, 0, 0);
3724 if (rep != NULL) {
3725 xmlFree(rep);
3726 rep = NULL;
3727 }
3728 }
3729
3730 /*
3731 * Just output the reference
3732 */
3733 buf[len++] = '&';
3734 while (len > buf_size - i - 10) {
3735 growBuffer(buf, i + 10);
3736 }
3737 for (;i > 0;i--)
3738 buf[len++] = *cur++;
3739 buf[len++] = ';';
3740 }
3741 }
3742 } else {
3743 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3744 if ((len != 0) || (!normalize)) {
3745 if ((!normalize) || (!in_space)) {
3746 COPY_BUF(l,buf,len,0x20);
3747 while (len > buf_size - 10) {
3748 growBuffer(buf, 10);
3749 }
3750 }
3751 in_space = 1;
3752 }
3753 } else {
3754 in_space = 0;
3755 COPY_BUF(l,buf,len,c);
3756 if (len > buf_size - 10) {
3757 growBuffer(buf, 10);
3758 }
3759 }
3760 NEXTL(l);
3761 }
3762 GROW;
3763 c = CUR_CHAR(l);
3764 }
3765 if ((in_space) && (normalize)) {
3766 while (buf[len - 1] == 0x20) len--;
3767 }
3768 buf[len] = 0;
3769 if (RAW == '<') {
3770 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3771 } else if (RAW != limit) {
3772 if ((c != 0) && (!IS_CHAR(c))) {
3773 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3774 "invalid character in attribute value\n");
3775 } else {
3776 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3777 "AttValue: ' expected\n");
3778 }
3779 } else
3780 NEXT;
3781 if (attlen != NULL) *attlen = len;
3782 return(buf);
3783
3784 mem_error:
3785 xmlErrMemory(ctxt, NULL);
3786 if (buf != NULL)
3787 xmlFree(buf);
3788 if (rep != NULL)
3789 xmlFree(rep);
3790 return(NULL);
3791 }
3792
3793 /**
3794 * xmlParseAttValue:
3795 * @ctxt: an XML parser context
3796 *
3797 * parse a value for an attribute
3798 * Note: the parser won't do substitution of entities here, this
3799 * will be handled later in xmlStringGetNodeList
3800 *
3801 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3802 * "'" ([^<&'] | Reference)* "'"
3803 *
3804 * 3.3.3 Attribute-Value Normalization:
3805 * Before the value of an attribute is passed to the application or
3806 * checked for validity, the XML processor must normalize it as follows:
3807 * - a character reference is processed by appending the referenced
3808 * character to the attribute value
3809 * - an entity reference is processed by recursively processing the
3810 * replacement text of the entity
3811 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3812 * appending #x20 to the normalized value, except that only a single
3813 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3814 * parsed entity or the literal entity value of an internal parsed entity
3815 * - other characters are processed by appending them to the normalized value
3816 * If the declared value is not CDATA, then the XML processor must further
3817 * process the normalized attribute value by discarding any leading and
3818 * trailing space (#x20) characters, and by replacing sequences of space
3819 * (#x20) characters by a single space (#x20) character.
3820 * All attributes for which no declaration has been read should be treated
3821 * by a non-validating parser as if declared CDATA.
3822 *
3823 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3824 */
3825
3826
3827 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)3828 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3829 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3830 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3831 }
3832
3833 /**
3834 * xmlParseSystemLiteral:
3835 * @ctxt: an XML parser context
3836 *
3837 * parse an XML Literal
3838 *
3839 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3840 *
3841 * Returns the SystemLiteral parsed or NULL
3842 */
3843
3844 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)3845 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3846 xmlChar *buf = NULL;
3847 int len = 0;
3848 int size = XML_PARSER_BUFFER_SIZE;
3849 int cur, l;
3850 xmlChar stop;
3851 int state = ctxt->instate;
3852 int count = 0;
3853
3854 SHRINK;
3855 if (RAW == '"') {
3856 NEXT;
3857 stop = '"';
3858 } else if (RAW == '\'') {
3859 NEXT;
3860 stop = '\'';
3861 } else {
3862 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3863 return(NULL);
3864 }
3865
3866 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3867 if (buf == NULL) {
3868 xmlErrMemory(ctxt, NULL);
3869 return(NULL);
3870 }
3871 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3872 cur = CUR_CHAR(l);
3873 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3874 if (len + 5 >= size) {
3875 xmlChar *tmp;
3876
3877 size *= 2;
3878 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3879 if (tmp == NULL) {
3880 xmlFree(buf);
3881 xmlErrMemory(ctxt, NULL);
3882 ctxt->instate = (xmlParserInputState) state;
3883 return(NULL);
3884 }
3885 buf = tmp;
3886 }
3887 count++;
3888 if (count > 50) {
3889 GROW;
3890 count = 0;
3891 }
3892 COPY_BUF(l,buf,len,cur);
3893 NEXTL(l);
3894 cur = CUR_CHAR(l);
3895 if (cur == 0) {
3896 GROW;
3897 SHRINK;
3898 cur = CUR_CHAR(l);
3899 }
3900 }
3901 buf[len] = 0;
3902 ctxt->instate = (xmlParserInputState) state;
3903 if (!IS_CHAR(cur)) {
3904 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3905 } else {
3906 NEXT;
3907 }
3908 return(buf);
3909 }
3910
3911 /**
3912 * xmlParsePubidLiteral:
3913 * @ctxt: an XML parser context
3914 *
3915 * parse an XML public literal
3916 *
3917 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3918 *
3919 * Returns the PubidLiteral parsed or NULL.
3920 */
3921
3922 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)3923 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3924 xmlChar *buf = NULL;
3925 int len = 0;
3926 int size = XML_PARSER_BUFFER_SIZE;
3927 xmlChar cur;
3928 xmlChar stop;
3929 int count = 0;
3930 xmlParserInputState oldstate = ctxt->instate;
3931
3932 SHRINK;
3933 if (RAW == '"') {
3934 NEXT;
3935 stop = '"';
3936 } else if (RAW == '\'') {
3937 NEXT;
3938 stop = '\'';
3939 } else {
3940 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3941 return(NULL);
3942 }
3943 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3944 if (buf == NULL) {
3945 xmlErrMemory(ctxt, NULL);
3946 return(NULL);
3947 }
3948 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3949 cur = CUR;
3950 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3951 if (len + 1 >= size) {
3952 xmlChar *tmp;
3953
3954 size *= 2;
3955 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3956 if (tmp == NULL) {
3957 xmlErrMemory(ctxt, NULL);
3958 xmlFree(buf);
3959 return(NULL);
3960 }
3961 buf = tmp;
3962 }
3963 buf[len++] = cur;
3964 count++;
3965 if (count > 50) {
3966 GROW;
3967 count = 0;
3968 }
3969 NEXT;
3970 cur = CUR;
3971 if (cur == 0) {
3972 GROW;
3973 SHRINK;
3974 cur = CUR;
3975 }
3976 }
3977 buf[len] = 0;
3978 if (cur != stop) {
3979 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3980 } else {
3981 NEXT;
3982 }
3983 ctxt->instate = oldstate;
3984 return(buf);
3985 }
3986
3987 void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3988
3989 /*
3990 * used for the test in the inner loop of the char data testing
3991 */
3992 static const unsigned char test_char_data[256] = {
3993 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3994 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3995 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3996 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3997 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3998 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3999 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4000 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4001 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4002 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4003 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4004 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4005 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4006 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4007 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4008 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4009 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4010 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4011 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4012 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4013 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4014 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4015 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4016 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4017 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4018 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4019 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4020 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4021 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4025 };
4026
4027 /**
4028 * xmlParseCharData:
4029 * @ctxt: an XML parser context
4030 * @cdata: int indicating whether we are within a CDATA section
4031 *
4032 * parse a CharData section.
4033 * if we are within a CDATA section ']]>' marks an end of section.
4034 *
4035 * The right angle bracket (>) may be represented using the string ">",
4036 * and must, for compatibility, be escaped using ">" or a character
4037 * reference when it appears in the string "]]>" in content, when that
4038 * string is not marking the end of a CDATA section.
4039 *
4040 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4041 */
4042
4043 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4044 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4045 const xmlChar *in;
4046 int nbchar = 0;
4047 int line = ctxt->input->line;
4048 int col = ctxt->input->col;
4049 int ccol;
4050
4051 SHRINK;
4052 GROW;
4053 /*
4054 * Accelerated common case where input don't need to be
4055 * modified before passing it to the handler.
4056 */
4057 if (!cdata) {
4058 in = ctxt->input->cur;
4059 do {
4060 get_more_space:
4061 while (*in == 0x20) { in++; ctxt->input->col++; }
4062 if (*in == 0xA) {
4063 do {
4064 ctxt->input->line++; ctxt->input->col = 1;
4065 in++;
4066 } while (*in == 0xA);
4067 goto get_more_space;
4068 }
4069 if (*in == '<') {
4070 nbchar = in - ctxt->input->cur;
4071 if (nbchar > 0) {
4072 const xmlChar *tmp = ctxt->input->cur;
4073 ctxt->input->cur = in;
4074
4075 if ((ctxt->sax != NULL) &&
4076 (ctxt->sax->ignorableWhitespace !=
4077 ctxt->sax->characters)) {
4078 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4079 if (ctxt->sax->ignorableWhitespace != NULL)
4080 ctxt->sax->ignorableWhitespace(ctxt->userData,
4081 tmp, nbchar);
4082 } else {
4083 if (ctxt->sax->characters != NULL)
4084 ctxt->sax->characters(ctxt->userData,
4085 tmp, nbchar);
4086 if (*ctxt->space == -1)
4087 *ctxt->space = -2;
4088 }
4089 } else if ((ctxt->sax != NULL) &&
4090 (ctxt->sax->characters != NULL)) {
4091 ctxt->sax->characters(ctxt->userData,
4092 tmp, nbchar);
4093 }
4094 }
4095 return;
4096 }
4097
4098 get_more:
4099 ccol = ctxt->input->col;
4100 while (test_char_data[*in]) {
4101 in++;
4102 ccol++;
4103 }
4104 ctxt->input->col = ccol;
4105 if (*in == 0xA) {
4106 do {
4107 ctxt->input->line++; ctxt->input->col = 1;
4108 in++;
4109 } while (*in == 0xA);
4110 goto get_more;
4111 }
4112 if (*in == ']') {
4113 if ((in[1] == ']') && (in[2] == '>')) {
4114 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4115 ctxt->input->cur = in;
4116 return;
4117 }
4118 in++;
4119 ctxt->input->col++;
4120 goto get_more;
4121 }
4122 nbchar = in - ctxt->input->cur;
4123 if (nbchar > 0) {
4124 if ((ctxt->sax != NULL) &&
4125 (ctxt->sax->ignorableWhitespace !=
4126 ctxt->sax->characters) &&
4127 (IS_BLANK_CH(*ctxt->input->cur))) {
4128 const xmlChar *tmp = ctxt->input->cur;
4129 ctxt->input->cur = in;
4130
4131 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4132 if (ctxt->sax->ignorableWhitespace != NULL)
4133 ctxt->sax->ignorableWhitespace(ctxt->userData,
4134 tmp, nbchar);
4135 } else {
4136 if (ctxt->sax->characters != NULL)
4137 ctxt->sax->characters(ctxt->userData,
4138 tmp, nbchar);
4139 if (*ctxt->space == -1)
4140 *ctxt->space = -2;
4141 }
4142 line = ctxt->input->line;
4143 col = ctxt->input->col;
4144 } else if (ctxt->sax != NULL) {
4145 if (ctxt->sax->characters != NULL)
4146 ctxt->sax->characters(ctxt->userData,
4147 ctxt->input->cur, nbchar);
4148 line = ctxt->input->line;
4149 col = ctxt->input->col;
4150 }
4151 /* something really bad happened in the SAX callback */
4152 if (ctxt->instate != XML_PARSER_CONTENT)
4153 return;
4154 }
4155 ctxt->input->cur = in;
4156 if (*in == 0xD) {
4157 in++;
4158 if (*in == 0xA) {
4159 ctxt->input->cur = in;
4160 in++;
4161 ctxt->input->line++; ctxt->input->col = 1;
4162 continue; /* while */
4163 }
4164 in--;
4165 }
4166 if (*in == '<') {
4167 return;
4168 }
4169 if (*in == '&') {
4170 return;
4171 }
4172 SHRINK;
4173 GROW;
4174 in = ctxt->input->cur;
4175 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4176 nbchar = 0;
4177 }
4178 ctxt->input->line = line;
4179 ctxt->input->col = col;
4180 xmlParseCharDataComplex(ctxt, cdata);
4181 }
4182
4183 /**
4184 * xmlParseCharDataComplex:
4185 * @ctxt: an XML parser context
4186 * @cdata: int indicating whether we are within a CDATA section
4187 *
4188 * parse a CharData section.this is the fallback function
4189 * of xmlParseCharData() when the parsing requires handling
4190 * of non-ASCII characters.
4191 */
4192 void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4193 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4194 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4195 int nbchar = 0;
4196 int cur, l;
4197 int count = 0;
4198
4199 SHRINK;
4200 GROW;
4201 cur = CUR_CHAR(l);
4202 while ((cur != '<') && /* checked */
4203 (cur != '&') &&
4204 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4205 if ((cur == ']') && (NXT(1) == ']') &&
4206 (NXT(2) == '>')) {
4207 if (cdata) break;
4208 else {
4209 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4210 }
4211 }
4212 COPY_BUF(l,buf,nbchar,cur);
4213 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4214 buf[nbchar] = 0;
4215
4216 /*
4217 * OK the segment is to be consumed as chars.
4218 */
4219 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4220 if (areBlanks(ctxt, buf, nbchar, 0)) {
4221 if (ctxt->sax->ignorableWhitespace != NULL)
4222 ctxt->sax->ignorableWhitespace(ctxt->userData,
4223 buf, nbchar);
4224 } else {
4225 if (ctxt->sax->characters != NULL)
4226 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4227 if ((ctxt->sax->characters !=
4228 ctxt->sax->ignorableWhitespace) &&
4229 (*ctxt->space == -1))
4230 *ctxt->space = -2;
4231 }
4232 }
4233 nbchar = 0;
4234 /* something really bad happened in the SAX callback */
4235 if (ctxt->instate != XML_PARSER_CONTENT)
4236 return;
4237 }
4238 count++;
4239 if (count > 50) {
4240 GROW;
4241 count = 0;
4242 }
4243 NEXTL(l);
4244 cur = CUR_CHAR(l);
4245 }
4246 if (nbchar != 0) {
4247 buf[nbchar] = 0;
4248 /*
4249 * OK the segment is to be consumed as chars.
4250 */
4251 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4252 if (areBlanks(ctxt, buf, nbchar, 0)) {
4253 if (ctxt->sax->ignorableWhitespace != NULL)
4254 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4255 } else {
4256 if (ctxt->sax->characters != NULL)
4257 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4258 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4259 (*ctxt->space == -1))
4260 *ctxt->space = -2;
4261 }
4262 }
4263 }
4264 if ((cur != 0) && (!IS_CHAR(cur))) {
4265 /* Generate the error and skip the offending character */
4266 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4267 "PCDATA invalid Char value %d\n",
4268 cur);
4269 NEXTL(l);
4270 }
4271 }
4272
4273 /**
4274 * xmlParseExternalID:
4275 * @ctxt: an XML parser context
4276 * @publicID: a xmlChar** receiving PubidLiteral
4277 * @strict: indicate whether we should restrict parsing to only
4278 * production [75], see NOTE below
4279 *
4280 * Parse an External ID or a Public ID
4281 *
4282 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4283 * 'PUBLIC' S PubidLiteral S SystemLiteral
4284 *
4285 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4286 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4287 *
4288 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4289 *
4290 * Returns the function returns SystemLiteral and in the second
4291 * case publicID receives PubidLiteral, is strict is off
4292 * it is possible to return NULL and have publicID set.
4293 */
4294
4295 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4296 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4297 xmlChar *URI = NULL;
4298
4299 SHRINK;
4300
4301 *publicID = NULL;
4302 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4303 SKIP(6);
4304 if (!IS_BLANK_CH(CUR)) {
4305 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4306 "Space required after 'SYSTEM'\n");
4307 }
4308 SKIP_BLANKS;
4309 URI = xmlParseSystemLiteral(ctxt);
4310 if (URI == NULL) {
4311 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4312 }
4313 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4314 SKIP(6);
4315 if (!IS_BLANK_CH(CUR)) {
4316 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4317 "Space required after 'PUBLIC'\n");
4318 }
4319 SKIP_BLANKS;
4320 *publicID = xmlParsePubidLiteral(ctxt);
4321 if (*publicID == NULL) {
4322 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4323 }
4324 if (strict) {
4325 /*
4326 * We don't handle [83] so "S SystemLiteral" is required.
4327 */
4328 if (!IS_BLANK_CH(CUR)) {
4329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4330 "Space required after the Public Identifier\n");
4331 }
4332 } else {
4333 /*
4334 * We handle [83] so we return immediately, if
4335 * "S SystemLiteral" is not detected. From a purely parsing
4336 * point of view that's a nice mess.
4337 */
4338 const xmlChar *ptr;
4339 GROW;
4340
4341 ptr = CUR_PTR;
4342 if (!IS_BLANK_CH(*ptr)) return(NULL);
4343
4344 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4345 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4346 }
4347 SKIP_BLANKS;
4348 URI = xmlParseSystemLiteral(ctxt);
4349 if (URI == NULL) {
4350 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4351 }
4352 }
4353 return(URI);
4354 }
4355
4356 /**
4357 * xmlParseCommentComplex:
4358 * @ctxt: an XML parser context
4359 * @buf: the already parsed part of the buffer
4360 * @len: number of bytes filles in the buffer
4361 * @size: allocated size of the buffer
4362 *
4363 * Skip an XML (SGML) comment <!-- .... -->
4364 * The spec says that "For compatibility, the string "--" (double-hyphen)
4365 * must not occur within comments. "
4366 * This is the slow routine in case the accelerator for ascii didn't work
4367 *
4368 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4369 */
4370 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,int len,int size)4371 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4372 int q, ql;
4373 int r, rl;
4374 int cur, l;
4375 int count = 0;
4376 int inputid;
4377
4378 inputid = ctxt->input->id;
4379
4380 if (buf == NULL) {
4381 len = 0;
4382 size = XML_PARSER_BUFFER_SIZE;
4383 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4384 if (buf == NULL) {
4385 xmlErrMemory(ctxt, NULL);
4386 return;
4387 }
4388 }
4389 GROW; /* Assure there's enough input data */
4390 q = CUR_CHAR(ql);
4391 if (q == 0)
4392 goto not_terminated;
4393 if (!IS_CHAR(q)) {
4394 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4395 "xmlParseComment: invalid xmlChar value %d\n",
4396 q);
4397 xmlFree (buf);
4398 return;
4399 }
4400 NEXTL(ql);
4401 r = CUR_CHAR(rl);
4402 if (r == 0)
4403 goto not_terminated;
4404 if (!IS_CHAR(r)) {
4405 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4406 "xmlParseComment: invalid xmlChar value %d\n",
4407 q);
4408 xmlFree (buf);
4409 return;
4410 }
4411 NEXTL(rl);
4412 cur = CUR_CHAR(l);
4413 if (cur == 0)
4414 goto not_terminated;
4415 while (IS_CHAR(cur) && /* checked */
4416 ((cur != '>') ||
4417 (r != '-') || (q != '-'))) {
4418 if ((r == '-') && (q == '-')) {
4419 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4420 }
4421 if (len + 5 >= size) {
4422 xmlChar *new_buf;
4423 size *= 2;
4424 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4425 if (new_buf == NULL) {
4426 xmlFree (buf);
4427 xmlErrMemory(ctxt, NULL);
4428 return;
4429 }
4430 buf = new_buf;
4431 }
4432 COPY_BUF(ql,buf,len,q);
4433 q = r;
4434 ql = rl;
4435 r = cur;
4436 rl = l;
4437
4438 count++;
4439 if (count > 50) {
4440 GROW;
4441 count = 0;
4442 }
4443 NEXTL(l);
4444 cur = CUR_CHAR(l);
4445 if (cur == 0) {
4446 SHRINK;
4447 GROW;
4448 cur = CUR_CHAR(l);
4449 }
4450 }
4451 buf[len] = 0;
4452 if (cur == 0) {
4453 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4454 "Comment not terminated \n<!--%.50s\n", buf);
4455 } else if (!IS_CHAR(cur)) {
4456 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4457 "xmlParseComment: invalid xmlChar value %d\n",
4458 cur);
4459 } else {
4460 if (inputid != ctxt->input->id) {
4461 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4462 "Comment doesn't start and stop in the same entity\n");
4463 }
4464 NEXT;
4465 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4466 (!ctxt->disableSAX))
4467 ctxt->sax->comment(ctxt->userData, buf);
4468 }
4469 xmlFree(buf);
4470 return;
4471 not_terminated:
4472 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4473 "Comment not terminated\n", NULL);
4474 xmlFree(buf);
4475 return;
4476 }
4477
4478 /**
4479 * xmlParseComment:
4480 * @ctxt: an XML parser context
4481 *
4482 * Skip an XML (SGML) comment <!-- .... -->
4483 * The spec says that "For compatibility, the string "--" (double-hyphen)
4484 * must not occur within comments. "
4485 *
4486 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4487 */
4488 void
xmlParseComment(xmlParserCtxtPtr ctxt)4489 xmlParseComment(xmlParserCtxtPtr ctxt) {
4490 xmlChar *buf = NULL;
4491 int size = XML_PARSER_BUFFER_SIZE;
4492 int len = 0;
4493 xmlParserInputState state;
4494 const xmlChar *in;
4495 int nbchar = 0, ccol;
4496 int inputid;
4497
4498 /*
4499 * Check that there is a comment right here.
4500 */
4501 if ((RAW != '<') || (NXT(1) != '!') ||
4502 (NXT(2) != '-') || (NXT(3) != '-')) return;
4503 state = ctxt->instate;
4504 ctxt->instate = XML_PARSER_COMMENT;
4505 inputid = ctxt->input->id;
4506 SKIP(4);
4507 SHRINK;
4508 GROW;
4509
4510 /*
4511 * Accelerated common case where input don't need to be
4512 * modified before passing it to the handler.
4513 */
4514 in = ctxt->input->cur;
4515 do {
4516 if (*in == 0xA) {
4517 do {
4518 ctxt->input->line++; ctxt->input->col = 1;
4519 in++;
4520 } while (*in == 0xA);
4521 }
4522 get_more:
4523 ccol = ctxt->input->col;
4524 while (((*in > '-') && (*in <= 0x7F)) ||
4525 ((*in >= 0x20) && (*in < '-')) ||
4526 (*in == 0x09)) {
4527 in++;
4528 ccol++;
4529 }
4530 ctxt->input->col = ccol;
4531 if (*in == 0xA) {
4532 do {
4533 ctxt->input->line++; ctxt->input->col = 1;
4534 in++;
4535 } while (*in == 0xA);
4536 goto get_more;
4537 }
4538 nbchar = in - ctxt->input->cur;
4539 /*
4540 * save current set of data
4541 */
4542 if (nbchar > 0) {
4543 if ((ctxt->sax != NULL) &&
4544 (ctxt->sax->comment != NULL)) {
4545 if (buf == NULL) {
4546 if ((*in == '-') && (in[1] == '-'))
4547 size = nbchar + 1;
4548 else
4549 size = XML_PARSER_BUFFER_SIZE + nbchar;
4550 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4551 if (buf == NULL) {
4552 xmlErrMemory(ctxt, NULL);
4553 ctxt->instate = state;
4554 return;
4555 }
4556 len = 0;
4557 } else if (len + nbchar + 1 >= size) {
4558 xmlChar *new_buf;
4559 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4560 new_buf = (xmlChar *) xmlRealloc(buf,
4561 size * sizeof(xmlChar));
4562 if (new_buf == NULL) {
4563 xmlFree (buf);
4564 xmlErrMemory(ctxt, NULL);
4565 ctxt->instate = state;
4566 return;
4567 }
4568 buf = new_buf;
4569 }
4570 memcpy(&buf[len], ctxt->input->cur, nbchar);
4571 len += nbchar;
4572 buf[len] = 0;
4573 }
4574 }
4575 ctxt->input->cur = in;
4576 if (*in == 0xA) {
4577 in++;
4578 ctxt->input->line++; ctxt->input->col = 1;
4579 }
4580 if (*in == 0xD) {
4581 in++;
4582 if (*in == 0xA) {
4583 ctxt->input->cur = in;
4584 in++;
4585 ctxt->input->line++; ctxt->input->col = 1;
4586 continue; /* while */
4587 }
4588 in--;
4589 }
4590 SHRINK;
4591 GROW;
4592 in = ctxt->input->cur;
4593 if (*in == '-') {
4594 if (in[1] == '-') {
4595 if (in[2] == '>') {
4596 if (ctxt->input->id != inputid) {
4597 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4598 "comment doesn't start and stop in the same entity\n");
4599 }
4600 SKIP(3);
4601 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4602 (!ctxt->disableSAX)) {
4603 if (buf != NULL)
4604 ctxt->sax->comment(ctxt->userData, buf);
4605 else
4606 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4607 }
4608 if (buf != NULL)
4609 xmlFree(buf);
4610 ctxt->instate = state;
4611 return;
4612 }
4613 if (buf != NULL)
4614 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4615 "Comment not terminated \n<!--%.50s\n",
4616 buf);
4617 else
4618 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4619 "Comment not terminated \n", NULL);
4620 in++;
4621 ctxt->input->col++;
4622 }
4623 in++;
4624 ctxt->input->col++;
4625 goto get_more;
4626 }
4627 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4628 xmlParseCommentComplex(ctxt, buf, len, size);
4629 ctxt->instate = state;
4630 return;
4631 }
4632
4633
4634 /**
4635 * xmlParsePITarget:
4636 * @ctxt: an XML parser context
4637 *
4638 * parse the name of a PI
4639 *
4640 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4641 *
4642 * Returns the PITarget name or NULL
4643 */
4644
4645 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4646 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4647 const xmlChar *name;
4648
4649 name = xmlParseName(ctxt);
4650 if ((name != NULL) &&
4651 ((name[0] == 'x') || (name[0] == 'X')) &&
4652 ((name[1] == 'm') || (name[1] == 'M')) &&
4653 ((name[2] == 'l') || (name[2] == 'L'))) {
4654 int i;
4655 if ((name[0] == 'x') && (name[1] == 'm') &&
4656 (name[2] == 'l') && (name[3] == 0)) {
4657 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4658 "XML declaration allowed only at the start of the document\n");
4659 return(name);
4660 } else if (name[3] == 0) {
4661 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4662 return(name);
4663 }
4664 for (i = 0;;i++) {
4665 if (xmlW3CPIs[i] == NULL) break;
4666 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4667 return(name);
4668 }
4669 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4670 "xmlParsePITarget: invalid name prefix 'xml'\n",
4671 NULL, NULL);
4672 }
4673 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4674 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4675 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4676 }
4677 return(name);
4678 }
4679
4680 #ifdef LIBXML_CATALOG_ENABLED
4681 /**
4682 * xmlParseCatalogPI:
4683 * @ctxt: an XML parser context
4684 * @catalog: the PI value string
4685 *
4686 * parse an XML Catalog Processing Instruction.
4687 *
4688 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4689 *
4690 * Occurs only if allowed by the user and if happening in the Misc
4691 * part of the document before any doctype informations
4692 * This will add the given catalog to the parsing context in order
4693 * to be used if there is a resolution need further down in the document
4694 */
4695
4696 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)4697 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4698 xmlChar *URL = NULL;
4699 const xmlChar *tmp, *base;
4700 xmlChar marker;
4701
4702 tmp = catalog;
4703 while (IS_BLANK_CH(*tmp)) tmp++;
4704 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4705 goto error;
4706 tmp += 7;
4707 while (IS_BLANK_CH(*tmp)) tmp++;
4708 if (*tmp != '=') {
4709 return;
4710 }
4711 tmp++;
4712 while (IS_BLANK_CH(*tmp)) tmp++;
4713 marker = *tmp;
4714 if ((marker != '\'') && (marker != '"'))
4715 goto error;
4716 tmp++;
4717 base = tmp;
4718 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4719 if (*tmp == 0)
4720 goto error;
4721 URL = xmlStrndup(base, tmp - base);
4722 tmp++;
4723 while (IS_BLANK_CH(*tmp)) tmp++;
4724 if (*tmp != 0)
4725 goto error;
4726
4727 if (URL != NULL) {
4728 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4729 xmlFree(URL);
4730 }
4731 return;
4732
4733 error:
4734 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4735 "Catalog PI syntax error: %s\n",
4736 catalog, NULL);
4737 if (URL != NULL)
4738 xmlFree(URL);
4739 }
4740 #endif
4741
4742 /**
4743 * xmlParsePI:
4744 * @ctxt: an XML parser context
4745 *
4746 * parse an XML Processing Instruction.
4747 *
4748 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4749 *
4750 * The processing is transfered to SAX once parsed.
4751 */
4752
4753 void
xmlParsePI(xmlParserCtxtPtr ctxt)4754 xmlParsePI(xmlParserCtxtPtr ctxt) {
4755 xmlChar *buf = NULL;
4756 int len = 0;
4757 int size = XML_PARSER_BUFFER_SIZE;
4758 int cur, l;
4759 const xmlChar *target;
4760 xmlParserInputState state;
4761 int count = 0;
4762
4763 if ((RAW == '<') && (NXT(1) == '?')) {
4764 xmlParserInputPtr input = ctxt->input;
4765 state = ctxt->instate;
4766 ctxt->instate = XML_PARSER_PI;
4767 /*
4768 * this is a Processing Instruction.
4769 */
4770 SKIP(2);
4771 SHRINK;
4772
4773 /*
4774 * Parse the target name and check for special support like
4775 * namespace.
4776 */
4777 target = xmlParsePITarget(ctxt);
4778 if (target != NULL) {
4779 if ((RAW == '?') && (NXT(1) == '>')) {
4780 if (input != ctxt->input) {
4781 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4782 "PI declaration doesn't start and stop in the same entity\n");
4783 }
4784 SKIP(2);
4785
4786 /*
4787 * SAX: PI detected.
4788 */
4789 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4790 (ctxt->sax->processingInstruction != NULL))
4791 ctxt->sax->processingInstruction(ctxt->userData,
4792 target, NULL);
4793 ctxt->instate = state;
4794 return;
4795 }
4796 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4797 if (buf == NULL) {
4798 xmlErrMemory(ctxt, NULL);
4799 ctxt->instate = state;
4800 return;
4801 }
4802 cur = CUR;
4803 if (!IS_BLANK(cur)) {
4804 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4805 "ParsePI: PI %s space expected\n", target);
4806 }
4807 SKIP_BLANKS;
4808 cur = CUR_CHAR(l);
4809 while (IS_CHAR(cur) && /* checked */
4810 ((cur != '?') || (NXT(1) != '>'))) {
4811 if (len + 5 >= size) {
4812 xmlChar *tmp;
4813
4814 size *= 2;
4815 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4816 if (tmp == NULL) {
4817 xmlErrMemory(ctxt, NULL);
4818 xmlFree(buf);
4819 ctxt->instate = state;
4820 return;
4821 }
4822 buf = tmp;
4823 }
4824 count++;
4825 if (count > 50) {
4826 GROW;
4827 count = 0;
4828 }
4829 COPY_BUF(l,buf,len,cur);
4830 NEXTL(l);
4831 cur = CUR_CHAR(l);
4832 if (cur == 0) {
4833 SHRINK;
4834 GROW;
4835 cur = CUR_CHAR(l);
4836 }
4837 }
4838 buf[len] = 0;
4839 if (cur != '?') {
4840 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4841 "ParsePI: PI %s never end ...\n", target);
4842 } else {
4843 if (input != ctxt->input) {
4844 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4845 "PI declaration doesn't start and stop in the same entity\n");
4846 }
4847 SKIP(2);
4848
4849 #ifdef LIBXML_CATALOG_ENABLED
4850 if (((state == XML_PARSER_MISC) ||
4851 (state == XML_PARSER_START)) &&
4852 (xmlStrEqual(target, XML_CATALOG_PI))) {
4853 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4854 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4855 (allow == XML_CATA_ALLOW_ALL))
4856 xmlParseCatalogPI(ctxt, buf);
4857 }
4858 #endif
4859
4860
4861 /*
4862 * SAX: PI detected.
4863 */
4864 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4865 (ctxt->sax->processingInstruction != NULL))
4866 ctxt->sax->processingInstruction(ctxt->userData,
4867 target, buf);
4868 }
4869 xmlFree(buf);
4870 } else {
4871 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4872 }
4873 ctxt->instate = state;
4874 }
4875 }
4876
4877 /**
4878 * xmlParseNotationDecl:
4879 * @ctxt: an XML parser context
4880 *
4881 * parse a notation declaration
4882 *
4883 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4884 *
4885 * Hence there is actually 3 choices:
4886 * 'PUBLIC' S PubidLiteral
4887 * 'PUBLIC' S PubidLiteral S SystemLiteral
4888 * and 'SYSTEM' S SystemLiteral
4889 *
4890 * See the NOTE on xmlParseExternalID().
4891 */
4892
4893 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)4894 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4895 const xmlChar *name;
4896 xmlChar *Pubid;
4897 xmlChar *Systemid;
4898
4899 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4900 xmlParserInputPtr input = ctxt->input;
4901 SHRINK;
4902 SKIP(10);
4903 if (!IS_BLANK_CH(CUR)) {
4904 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4905 "Space required after '<!NOTATION'\n");
4906 return;
4907 }
4908 SKIP_BLANKS;
4909
4910 name = xmlParseName(ctxt);
4911 if (name == NULL) {
4912 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4913 return;
4914 }
4915 if (!IS_BLANK_CH(CUR)) {
4916 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4917 "Space required after the NOTATION name'\n");
4918 return;
4919 }
4920 if (xmlStrchr(name, ':') != NULL) {
4921 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4922 "colon are forbidden from notation names '%s'\n",
4923 name, NULL, NULL);
4924 }
4925 SKIP_BLANKS;
4926
4927 /*
4928 * Parse the IDs.
4929 */
4930 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4931 SKIP_BLANKS;
4932
4933 if (RAW == '>') {
4934 if (input != ctxt->input) {
4935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4936 "Notation declaration doesn't start and stop in the same entity\n");
4937 }
4938 NEXT;
4939 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4940 (ctxt->sax->notationDecl != NULL))
4941 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4942 } else {
4943 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4944 }
4945 if (Systemid != NULL) xmlFree(Systemid);
4946 if (Pubid != NULL) xmlFree(Pubid);
4947 }
4948 }
4949
4950 /**
4951 * xmlParseEntityDecl:
4952 * @ctxt: an XML parser context
4953 *
4954 * parse <!ENTITY declarations
4955 *
4956 * [70] EntityDecl ::= GEDecl | PEDecl
4957 *
4958 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4959 *
4960 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4961 *
4962 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4963 *
4964 * [74] PEDef ::= EntityValue | ExternalID
4965 *
4966 * [76] NDataDecl ::= S 'NDATA' S Name
4967 *
4968 * [ VC: Notation Declared ]
4969 * The Name must match the declared name of a notation.
4970 */
4971
4972 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)4973 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4974 const xmlChar *name = NULL;
4975 xmlChar *value = NULL;
4976 xmlChar *URI = NULL, *literal = NULL;
4977 const xmlChar *ndata = NULL;
4978 int isParameter = 0;
4979 xmlChar *orig = NULL;
4980 int skipped;
4981
4982 /* GROW; done in the caller */
4983 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4984 xmlParserInputPtr input = ctxt->input;
4985 SHRINK;
4986 SKIP(8);
4987 skipped = SKIP_BLANKS;
4988 if (skipped == 0) {
4989 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4990 "Space required after '<!ENTITY'\n");
4991 }
4992
4993 if (RAW == '%') {
4994 NEXT;
4995 skipped = SKIP_BLANKS;
4996 if (skipped == 0) {
4997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4998 "Space required after '%'\n");
4999 }
5000 isParameter = 1;
5001 }
5002
5003 name = xmlParseName(ctxt);
5004 if (name == NULL) {
5005 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5006 "xmlParseEntityDecl: no name\n");
5007 return;
5008 }
5009 if (xmlStrchr(name, ':') != NULL) {
5010 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5011 "colon are forbidden from entities names '%s'\n",
5012 name, NULL, NULL);
5013 }
5014 skipped = SKIP_BLANKS;
5015 if (skipped == 0) {
5016 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5017 "Space required after the entity name\n");
5018 }
5019
5020 ctxt->instate = XML_PARSER_ENTITY_DECL;
5021 /*
5022 * handle the various case of definitions...
5023 */
5024 if (isParameter) {
5025 if ((RAW == '"') || (RAW == '\'')) {
5026 value = xmlParseEntityValue(ctxt, &orig);
5027 if (value) {
5028 if ((ctxt->sax != NULL) &&
5029 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5030 ctxt->sax->entityDecl(ctxt->userData, name,
5031 XML_INTERNAL_PARAMETER_ENTITY,
5032 NULL, NULL, value);
5033 }
5034 } else {
5035 URI = xmlParseExternalID(ctxt, &literal, 1);
5036 if ((URI == NULL) && (literal == NULL)) {
5037 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5038 }
5039 if (URI) {
5040 xmlURIPtr uri;
5041
5042 uri = xmlParseURI((const char *) URI);
5043 if (uri == NULL) {
5044 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5045 "Invalid URI: %s\n", URI);
5046 /*
5047 * This really ought to be a well formedness error
5048 * but the XML Core WG decided otherwise c.f. issue
5049 * E26 of the XML erratas.
5050 */
5051 } else {
5052 if (uri->fragment != NULL) {
5053 /*
5054 * Okay this is foolish to block those but not
5055 * invalid URIs.
5056 */
5057 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5058 } else {
5059 if ((ctxt->sax != NULL) &&
5060 (!ctxt->disableSAX) &&
5061 (ctxt->sax->entityDecl != NULL))
5062 ctxt->sax->entityDecl(ctxt->userData, name,
5063 XML_EXTERNAL_PARAMETER_ENTITY,
5064 literal, URI, NULL);
5065 }
5066 xmlFreeURI(uri);
5067 }
5068 }
5069 }
5070 } else {
5071 if ((RAW == '"') || (RAW == '\'')) {
5072 value = xmlParseEntityValue(ctxt, &orig);
5073 if ((ctxt->sax != NULL) &&
5074 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5075 ctxt->sax->entityDecl(ctxt->userData, name,
5076 XML_INTERNAL_GENERAL_ENTITY,
5077 NULL, NULL, value);
5078 /*
5079 * For expat compatibility in SAX mode.
5080 */
5081 if ((ctxt->myDoc == NULL) ||
5082 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5083 if (ctxt->myDoc == NULL) {
5084 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5085 if (ctxt->myDoc == NULL) {
5086 xmlErrMemory(ctxt, "New Doc failed");
5087 return;
5088 }
5089 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5090 }
5091 if (ctxt->myDoc->intSubset == NULL)
5092 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5093 BAD_CAST "fake", NULL, NULL);
5094
5095 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5096 NULL, NULL, value);
5097 }
5098 } else {
5099 URI = xmlParseExternalID(ctxt, &literal, 1);
5100 if ((URI == NULL) && (literal == NULL)) {
5101 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5102 }
5103 if (URI) {
5104 xmlURIPtr uri;
5105
5106 uri = xmlParseURI((const char *)URI);
5107 if (uri == NULL) {
5108 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5109 "Invalid URI: %s\n", URI);
5110 /*
5111 * This really ought to be a well formedness error
5112 * but the XML Core WG decided otherwise c.f. issue
5113 * E26 of the XML erratas.
5114 */
5115 } else {
5116 if (uri->fragment != NULL) {
5117 /*
5118 * Okay this is foolish to block those but not
5119 * invalid URIs.
5120 */
5121 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5122 }
5123 xmlFreeURI(uri);
5124 }
5125 }
5126 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5127 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5128 "Space required before 'NDATA'\n");
5129 }
5130 SKIP_BLANKS;
5131 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5132 SKIP(5);
5133 if (!IS_BLANK_CH(CUR)) {
5134 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5135 "Space required after 'NDATA'\n");
5136 }
5137 SKIP_BLANKS;
5138 ndata = xmlParseName(ctxt);
5139 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5140 (ctxt->sax->unparsedEntityDecl != NULL))
5141 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5142 literal, URI, ndata);
5143 } else {
5144 if ((ctxt->sax != NULL) &&
5145 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5146 ctxt->sax->entityDecl(ctxt->userData, name,
5147 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5148 literal, URI, NULL);
5149 /*
5150 * For expat compatibility in SAX mode.
5151 * assuming the entity repalcement was asked for
5152 */
5153 if ((ctxt->replaceEntities != 0) &&
5154 ((ctxt->myDoc == NULL) ||
5155 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5156 if (ctxt->myDoc == NULL) {
5157 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5158 if (ctxt->myDoc == NULL) {
5159 xmlErrMemory(ctxt, "New Doc failed");
5160 return;
5161 }
5162 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5163 }
5164
5165 if (ctxt->myDoc->intSubset == NULL)
5166 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5167 BAD_CAST "fake", NULL, NULL);
5168 xmlSAX2EntityDecl(ctxt, name,
5169 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5170 literal, URI, NULL);
5171 }
5172 }
5173 }
5174 }
5175 SKIP_BLANKS;
5176 if (RAW != '>') {
5177 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5178 "xmlParseEntityDecl: entity %s not terminated\n", name);
5179 } else {
5180 if (input != ctxt->input) {
5181 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5182 "Entity declaration doesn't start and stop in the same entity\n");
5183 }
5184 NEXT;
5185 }
5186 if (orig != NULL) {
5187 /*
5188 * Ugly mechanism to save the raw entity value.
5189 */
5190 xmlEntityPtr cur = NULL;
5191
5192 if (isParameter) {
5193 if ((ctxt->sax != NULL) &&
5194 (ctxt->sax->getParameterEntity != NULL))
5195 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5196 } else {
5197 if ((ctxt->sax != NULL) &&
5198 (ctxt->sax->getEntity != NULL))
5199 cur = ctxt->sax->getEntity(ctxt->userData, name);
5200 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5201 cur = xmlSAX2GetEntity(ctxt, name);
5202 }
5203 }
5204 if (cur != NULL) {
5205 if (cur->orig != NULL)
5206 xmlFree(orig);
5207 else
5208 cur->orig = orig;
5209 } else
5210 xmlFree(orig);
5211 }
5212 if (value != NULL) xmlFree(value);
5213 if (URI != NULL) xmlFree(URI);
5214 if (literal != NULL) xmlFree(literal);
5215 }
5216 }
5217
5218 /**
5219 * xmlParseDefaultDecl:
5220 * @ctxt: an XML parser context
5221 * @value: Receive a possible fixed default value for the attribute
5222 *
5223 * Parse an attribute default declaration
5224 *
5225 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5226 *
5227 * [ VC: Required Attribute ]
5228 * if the default declaration is the keyword #REQUIRED, then the
5229 * attribute must be specified for all elements of the type in the
5230 * attribute-list declaration.
5231 *
5232 * [ VC: Attribute Default Legal ]
5233 * The declared default value must meet the lexical constraints of
5234 * the declared attribute type c.f. xmlValidateAttributeDecl()
5235 *
5236 * [ VC: Fixed Attribute Default ]
5237 * if an attribute has a default value declared with the #FIXED
5238 * keyword, instances of that attribute must match the default value.
5239 *
5240 * [ WFC: No < in Attribute Values ]
5241 * handled in xmlParseAttValue()
5242 *
5243 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5244 * or XML_ATTRIBUTE_FIXED.
5245 */
5246
5247 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5248 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5249 int val;
5250 xmlChar *ret;
5251
5252 *value = NULL;
5253 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5254 SKIP(9);
5255 return(XML_ATTRIBUTE_REQUIRED);
5256 }
5257 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5258 SKIP(8);
5259 return(XML_ATTRIBUTE_IMPLIED);
5260 }
5261 val = XML_ATTRIBUTE_NONE;
5262 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5263 SKIP(6);
5264 val = XML_ATTRIBUTE_FIXED;
5265 if (!IS_BLANK_CH(CUR)) {
5266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5267 "Space required after '#FIXED'\n");
5268 }
5269 SKIP_BLANKS;
5270 }
5271 ret = xmlParseAttValue(ctxt);
5272 ctxt->instate = XML_PARSER_DTD;
5273 if (ret == NULL) {
5274 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5275 "Attribute default value declaration error\n");
5276 } else
5277 *value = ret;
5278 return(val);
5279 }
5280
5281 /**
5282 * xmlParseNotationType:
5283 * @ctxt: an XML parser context
5284 *
5285 * parse an Notation attribute type.
5286 *
5287 * Note: the leading 'NOTATION' S part has already being parsed...
5288 *
5289 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5290 *
5291 * [ VC: Notation Attributes ]
5292 * Values of this type must match one of the notation names included
5293 * in the declaration; all notation names in the declaration must be declared.
5294 *
5295 * Returns: the notation attribute tree built while parsing
5296 */
5297
5298 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5299 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5300 const xmlChar *name;
5301 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5302
5303 if (RAW != '(') {
5304 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5305 return(NULL);
5306 }
5307 SHRINK;
5308 do {
5309 NEXT;
5310 SKIP_BLANKS;
5311 name = xmlParseName(ctxt);
5312 if (name == NULL) {
5313 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5314 "Name expected in NOTATION declaration\n");
5315 return(ret);
5316 }
5317 tmp = ret;
5318 while (tmp != NULL) {
5319 if (xmlStrEqual(name, tmp->name)) {
5320 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5321 "standalone: attribute notation value token %s duplicated\n",
5322 name, NULL);
5323 if (!xmlDictOwns(ctxt->dict, name))
5324 xmlFree((xmlChar *) name);
5325 break;
5326 }
5327 tmp = tmp->next;
5328 }
5329 if (tmp == NULL) {
5330 cur = xmlCreateEnumeration(name);
5331 if (cur == NULL) return(ret);
5332 if (last == NULL) ret = last = cur;
5333 else {
5334 last->next = cur;
5335 last = cur;
5336 }
5337 }
5338 SKIP_BLANKS;
5339 } while (RAW == '|');
5340 if (RAW != ')') {
5341 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5342 if ((last != NULL) && (last != ret))
5343 xmlFreeEnumeration(last);
5344 return(ret);
5345 }
5346 NEXT;
5347 return(ret);
5348 }
5349
5350 /**
5351 * xmlParseEnumerationType:
5352 * @ctxt: an XML parser context
5353 *
5354 * parse an Enumeration attribute type.
5355 *
5356 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5357 *
5358 * [ VC: Enumeration ]
5359 * Values of this type must match one of the Nmtoken tokens in
5360 * the declaration
5361 *
5362 * Returns: the enumeration attribute tree built while parsing
5363 */
5364
5365 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5366 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5367 xmlChar *name;
5368 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5369
5370 if (RAW != '(') {
5371 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5372 return(NULL);
5373 }
5374 SHRINK;
5375 do {
5376 NEXT;
5377 SKIP_BLANKS;
5378 name = xmlParseNmtoken(ctxt);
5379 if (name == NULL) {
5380 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5381 return(ret);
5382 }
5383 tmp = ret;
5384 while (tmp != NULL) {
5385 if (xmlStrEqual(name, tmp->name)) {
5386 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5387 "standalone: attribute enumeration value token %s duplicated\n",
5388 name, NULL);
5389 if (!xmlDictOwns(ctxt->dict, name))
5390 xmlFree(name);
5391 break;
5392 }
5393 tmp = tmp->next;
5394 }
5395 if (tmp == NULL) {
5396 cur = xmlCreateEnumeration(name);
5397 if (!xmlDictOwns(ctxt->dict, name))
5398 xmlFree(name);
5399 if (cur == NULL) return(ret);
5400 if (last == NULL) ret = last = cur;
5401 else {
5402 last->next = cur;
5403 last = cur;
5404 }
5405 }
5406 SKIP_BLANKS;
5407 } while (RAW == '|');
5408 if (RAW != ')') {
5409 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5410 return(ret);
5411 }
5412 NEXT;
5413 return(ret);
5414 }
5415
5416 /**
5417 * xmlParseEnumeratedType:
5418 * @ctxt: an XML parser context
5419 * @tree: the enumeration tree built while parsing
5420 *
5421 * parse an Enumerated attribute type.
5422 *
5423 * [57] EnumeratedType ::= NotationType | Enumeration
5424 *
5425 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5426 *
5427 *
5428 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5429 */
5430
5431 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5432 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5433 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5434 SKIP(8);
5435 if (!IS_BLANK_CH(CUR)) {
5436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 "Space required after 'NOTATION'\n");
5438 return(0);
5439 }
5440 SKIP_BLANKS;
5441 *tree = xmlParseNotationType(ctxt);
5442 if (*tree == NULL) return(0);
5443 return(XML_ATTRIBUTE_NOTATION);
5444 }
5445 *tree = xmlParseEnumerationType(ctxt);
5446 if (*tree == NULL) return(0);
5447 return(XML_ATTRIBUTE_ENUMERATION);
5448 }
5449
5450 /**
5451 * xmlParseAttributeType:
5452 * @ctxt: an XML parser context
5453 * @tree: the enumeration tree built while parsing
5454 *
5455 * parse the Attribute list def for an element
5456 *
5457 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5458 *
5459 * [55] StringType ::= 'CDATA'
5460 *
5461 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5462 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5463 *
5464 * Validity constraints for attribute values syntax are checked in
5465 * xmlValidateAttributeValue()
5466 *
5467 * [ VC: ID ]
5468 * Values of type ID must match the Name production. A name must not
5469 * appear more than once in an XML document as a value of this type;
5470 * i.e., ID values must uniquely identify the elements which bear them.
5471 *
5472 * [ VC: One ID per Element Type ]
5473 * No element type may have more than one ID attribute specified.
5474 *
5475 * [ VC: ID Attribute Default ]
5476 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5477 *
5478 * [ VC: IDREF ]
5479 * Values of type IDREF must match the Name production, and values
5480 * of type IDREFS must match Names; each IDREF Name must match the value
5481 * of an ID attribute on some element in the XML document; i.e. IDREF
5482 * values must match the value of some ID attribute.
5483 *
5484 * [ VC: Entity Name ]
5485 * Values of type ENTITY must match the Name production, values
5486 * of type ENTITIES must match Names; each Entity Name must match the
5487 * name of an unparsed entity declared in the DTD.
5488 *
5489 * [ VC: Name Token ]
5490 * Values of type NMTOKEN must match the Nmtoken production; values
5491 * of type NMTOKENS must match Nmtokens.
5492 *
5493 * Returns the attribute type
5494 */
5495 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5496 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5497 SHRINK;
5498 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5499 SKIP(5);
5500 return(XML_ATTRIBUTE_CDATA);
5501 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5502 SKIP(6);
5503 return(XML_ATTRIBUTE_IDREFS);
5504 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5505 SKIP(5);
5506 return(XML_ATTRIBUTE_IDREF);
5507 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5508 SKIP(2);
5509 return(XML_ATTRIBUTE_ID);
5510 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5511 SKIP(6);
5512 return(XML_ATTRIBUTE_ENTITY);
5513 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5514 SKIP(8);
5515 return(XML_ATTRIBUTE_ENTITIES);
5516 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5517 SKIP(8);
5518 return(XML_ATTRIBUTE_NMTOKENS);
5519 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5520 SKIP(7);
5521 return(XML_ATTRIBUTE_NMTOKEN);
5522 }
5523 return(xmlParseEnumeratedType(ctxt, tree));
5524 }
5525
5526 /**
5527 * xmlParseAttributeListDecl:
5528 * @ctxt: an XML parser context
5529 *
5530 * : parse the Attribute list def for an element
5531 *
5532 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5533 *
5534 * [53] AttDef ::= S Name S AttType S DefaultDecl
5535 *
5536 */
5537 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5538 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5539 const xmlChar *elemName;
5540 const xmlChar *attrName;
5541 xmlEnumerationPtr tree;
5542
5543 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5544 xmlParserInputPtr input = ctxt->input;
5545
5546 SKIP(9);
5547 if (!IS_BLANK_CH(CUR)) {
5548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5549 "Space required after '<!ATTLIST'\n");
5550 }
5551 SKIP_BLANKS;
5552 elemName = xmlParseName(ctxt);
5553 if (elemName == NULL) {
5554 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5555 "ATTLIST: no name for Element\n");
5556 return;
5557 }
5558 SKIP_BLANKS;
5559 GROW;
5560 while (RAW != '>') {
5561 const xmlChar *check = CUR_PTR;
5562 int type;
5563 int def;
5564 xmlChar *defaultValue = NULL;
5565
5566 GROW;
5567 tree = NULL;
5568 attrName = xmlParseName(ctxt);
5569 if (attrName == NULL) {
5570 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5571 "ATTLIST: no name for Attribute\n");
5572 break;
5573 }
5574 GROW;
5575 if (!IS_BLANK_CH(CUR)) {
5576 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5577 "Space required after the attribute name\n");
5578 break;
5579 }
5580 SKIP_BLANKS;
5581
5582 type = xmlParseAttributeType(ctxt, &tree);
5583 if (type <= 0) {
5584 break;
5585 }
5586
5587 GROW;
5588 if (!IS_BLANK_CH(CUR)) {
5589 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5590 "Space required after the attribute type\n");
5591 if (tree != NULL)
5592 xmlFreeEnumeration(tree);
5593 break;
5594 }
5595 SKIP_BLANKS;
5596
5597 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5598 if (def <= 0) {
5599 if (defaultValue != NULL)
5600 xmlFree(defaultValue);
5601 if (tree != NULL)
5602 xmlFreeEnumeration(tree);
5603 break;
5604 }
5605 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5606 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5607
5608 GROW;
5609 if (RAW != '>') {
5610 if (!IS_BLANK_CH(CUR)) {
5611 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5612 "Space required after the attribute default value\n");
5613 if (defaultValue != NULL)
5614 xmlFree(defaultValue);
5615 if (tree != NULL)
5616 xmlFreeEnumeration(tree);
5617 break;
5618 }
5619 SKIP_BLANKS;
5620 }
5621 if (check == CUR_PTR) {
5622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5623 "in xmlParseAttributeListDecl\n");
5624 if (defaultValue != NULL)
5625 xmlFree(defaultValue);
5626 if (tree != NULL)
5627 xmlFreeEnumeration(tree);
5628 break;
5629 }
5630 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5631 (ctxt->sax->attributeDecl != NULL))
5632 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5633 type, def, defaultValue, tree);
5634 else if (tree != NULL)
5635 xmlFreeEnumeration(tree);
5636
5637 if ((ctxt->sax2) && (defaultValue != NULL) &&
5638 (def != XML_ATTRIBUTE_IMPLIED) &&
5639 (def != XML_ATTRIBUTE_REQUIRED)) {
5640 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5641 }
5642 if (ctxt->sax2) {
5643 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5644 }
5645 if (defaultValue != NULL)
5646 xmlFree(defaultValue);
5647 GROW;
5648 }
5649 if (RAW == '>') {
5650 if (input != ctxt->input) {
5651 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5652 "Attribute list declaration doesn't start and stop in the same entity\n",
5653 NULL, NULL);
5654 }
5655 NEXT;
5656 }
5657 }
5658 }
5659
5660 /**
5661 * xmlParseElementMixedContentDecl:
5662 * @ctxt: an XML parser context
5663 * @inputchk: the input used for the current entity, needed for boundary checks
5664 *
5665 * parse the declaration for a Mixed Element content
5666 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5667 *
5668 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5669 * '(' S? '#PCDATA' S? ')'
5670 *
5671 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5672 *
5673 * [ VC: No Duplicate Types ]
5674 * The same name must not appear more than once in a single
5675 * mixed-content declaration.
5676 *
5677 * returns: the list of the xmlElementContentPtr describing the element choices
5678 */
5679 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5680 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5681 xmlElementContentPtr ret = NULL, cur = NULL, n;
5682 const xmlChar *elem = NULL;
5683
5684 GROW;
5685 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5686 SKIP(7);
5687 SKIP_BLANKS;
5688 SHRINK;
5689 if (RAW == ')') {
5690 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5691 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5692 "Element content declaration doesn't start and stop in the same entity\n",
5693 NULL, NULL);
5694 }
5695 NEXT;
5696 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5697 if (ret == NULL)
5698 return(NULL);
5699 if (RAW == '*') {
5700 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5701 NEXT;
5702 }
5703 return(ret);
5704 }
5705 if ((RAW == '(') || (RAW == '|')) {
5706 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5707 if (ret == NULL) return(NULL);
5708 }
5709 while (RAW == '|') {
5710 NEXT;
5711 if (elem == NULL) {
5712 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5713 if (ret == NULL) return(NULL);
5714 ret->c1 = cur;
5715 if (cur != NULL)
5716 cur->parent = ret;
5717 cur = ret;
5718 } else {
5719 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5720 if (n == NULL) return(NULL);
5721 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5722 if (n->c1 != NULL)
5723 n->c1->parent = n;
5724 cur->c2 = n;
5725 if (n != NULL)
5726 n->parent = cur;
5727 cur = n;
5728 }
5729 SKIP_BLANKS;
5730 elem = xmlParseName(ctxt);
5731 if (elem == NULL) {
5732 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5733 "xmlParseElementMixedContentDecl : Name expected\n");
5734 xmlFreeDocElementContent(ctxt->myDoc, cur);
5735 return(NULL);
5736 }
5737 SKIP_BLANKS;
5738 GROW;
5739 }
5740 if ((RAW == ')') && (NXT(1) == '*')) {
5741 if (elem != NULL) {
5742 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5743 XML_ELEMENT_CONTENT_ELEMENT);
5744 if (cur->c2 != NULL)
5745 cur->c2->parent = cur;
5746 }
5747 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5748 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5749 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5750 "Element content declaration doesn't start and stop in the same entity\n",
5751 NULL, NULL);
5752 }
5753 SKIP(2);
5754 } else {
5755 xmlFreeDocElementContent(ctxt->myDoc, ret);
5756 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5757 return(NULL);
5758 }
5759
5760 } else {
5761 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5762 }
5763 return(ret);
5764 }
5765
5766 /**
5767 * xmlParseElementChildrenContentDecl:
5768 * @ctxt: an XML parser context
5769 * @inputchk: the input used for the current entity, needed for boundary checks
5770 *
5771 * parse the declaration for a Mixed Element content
5772 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5773 *
5774 *
5775 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5776 *
5777 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5778 *
5779 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5780 *
5781 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5782 *
5783 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5784 * TODO Parameter-entity replacement text must be properly nested
5785 * with parenthesized groups. That is to say, if either of the
5786 * opening or closing parentheses in a choice, seq, or Mixed
5787 * construct is contained in the replacement text for a parameter
5788 * entity, both must be contained in the same replacement text. For
5789 * interoperability, if a parameter-entity reference appears in a
5790 * choice, seq, or Mixed construct, its replacement text should not
5791 * be empty, and neither the first nor last non-blank character of
5792 * the replacement text should be a connector (| or ,).
5793 *
5794 * Returns the tree of xmlElementContentPtr describing the element
5795 * hierarchy.
5796 */
5797 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5798 xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5799 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5800 const xmlChar *elem;
5801 xmlChar type = 0;
5802
5803 SKIP_BLANKS;
5804 GROW;
5805 if (RAW == '(') {
5806 int inputid = ctxt->input->id;
5807
5808 /* Recurse on first child */
5809 NEXT;
5810 SKIP_BLANKS;
5811 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5812 SKIP_BLANKS;
5813 GROW;
5814 } else {
5815 elem = xmlParseName(ctxt);
5816 if (elem == NULL) {
5817 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5818 return(NULL);
5819 }
5820 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5821 if (cur == NULL) {
5822 xmlErrMemory(ctxt, NULL);
5823 return(NULL);
5824 }
5825 GROW;
5826 if (RAW == '?') {
5827 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5828 NEXT;
5829 } else if (RAW == '*') {
5830 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5831 NEXT;
5832 } else if (RAW == '+') {
5833 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5834 NEXT;
5835 } else {
5836 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5837 }
5838 GROW;
5839 }
5840 SKIP_BLANKS;
5841 SHRINK;
5842 while (RAW != ')') {
5843 /*
5844 * Each loop we parse one separator and one element.
5845 */
5846 if (RAW == ',') {
5847 if (type == 0) type = CUR;
5848
5849 /*
5850 * Detect "Name | Name , Name" error
5851 */
5852 else if (type != CUR) {
5853 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5854 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5855 type);
5856 if ((last != NULL) && (last != ret))
5857 xmlFreeDocElementContent(ctxt->myDoc, last);
5858 if (ret != NULL)
5859 xmlFreeDocElementContent(ctxt->myDoc, ret);
5860 return(NULL);
5861 }
5862 NEXT;
5863
5864 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5865 if (op == NULL) {
5866 if ((last != NULL) && (last != ret))
5867 xmlFreeDocElementContent(ctxt->myDoc, last);
5868 xmlFreeDocElementContent(ctxt->myDoc, ret);
5869 return(NULL);
5870 }
5871 if (last == NULL) {
5872 op->c1 = ret;
5873 if (ret != NULL)
5874 ret->parent = op;
5875 ret = cur = op;
5876 } else {
5877 cur->c2 = op;
5878 if (op != NULL)
5879 op->parent = cur;
5880 op->c1 = last;
5881 if (last != NULL)
5882 last->parent = op;
5883 cur =op;
5884 last = NULL;
5885 }
5886 } else if (RAW == '|') {
5887 if (type == 0) type = CUR;
5888
5889 /*
5890 * Detect "Name , Name | Name" error
5891 */
5892 else if (type != CUR) {
5893 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5894 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5895 type);
5896 if ((last != NULL) && (last != ret))
5897 xmlFreeDocElementContent(ctxt->myDoc, last);
5898 if (ret != NULL)
5899 xmlFreeDocElementContent(ctxt->myDoc, ret);
5900 return(NULL);
5901 }
5902 NEXT;
5903
5904 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5905 if (op == NULL) {
5906 if ((last != NULL) && (last != ret))
5907 xmlFreeDocElementContent(ctxt->myDoc, last);
5908 if (ret != NULL)
5909 xmlFreeDocElementContent(ctxt->myDoc, ret);
5910 return(NULL);
5911 }
5912 if (last == NULL) {
5913 op->c1 = ret;
5914 if (ret != NULL)
5915 ret->parent = op;
5916 ret = cur = op;
5917 } else {
5918 cur->c2 = op;
5919 if (op != NULL)
5920 op->parent = cur;
5921 op->c1 = last;
5922 if (last != NULL)
5923 last->parent = op;
5924 cur =op;
5925 last = NULL;
5926 }
5927 } else {
5928 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5929 if ((last != NULL) && (last != ret))
5930 xmlFreeDocElementContent(ctxt->myDoc, last);
5931 if (ret != NULL)
5932 xmlFreeDocElementContent(ctxt->myDoc, ret);
5933 return(NULL);
5934 }
5935 GROW;
5936 SKIP_BLANKS;
5937 GROW;
5938 if (RAW == '(') {
5939 int inputid = ctxt->input->id;
5940 /* Recurse on second child */
5941 NEXT;
5942 SKIP_BLANKS;
5943 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5944 SKIP_BLANKS;
5945 } else {
5946 elem = xmlParseName(ctxt);
5947 if (elem == NULL) {
5948 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5949 if (ret != NULL)
5950 xmlFreeDocElementContent(ctxt->myDoc, ret);
5951 return(NULL);
5952 }
5953 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5954 if (last == NULL) {
5955 if (ret != NULL)
5956 xmlFreeDocElementContent(ctxt->myDoc, ret);
5957 return(NULL);
5958 }
5959 if (RAW == '?') {
5960 last->ocur = XML_ELEMENT_CONTENT_OPT;
5961 NEXT;
5962 } else if (RAW == '*') {
5963 last->ocur = XML_ELEMENT_CONTENT_MULT;
5964 NEXT;
5965 } else if (RAW == '+') {
5966 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5967 NEXT;
5968 } else {
5969 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5970 }
5971 }
5972 SKIP_BLANKS;
5973 GROW;
5974 }
5975 if ((cur != NULL) && (last != NULL)) {
5976 cur->c2 = last;
5977 if (last != NULL)
5978 last->parent = cur;
5979 }
5980 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5981 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5982 "Element content declaration doesn't start and stop in the same entity\n",
5983 NULL, NULL);
5984 }
5985 NEXT;
5986 if (RAW == '?') {
5987 if (ret != NULL) {
5988 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5989 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5990 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5991 else
5992 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5993 }
5994 NEXT;
5995 } else if (RAW == '*') {
5996 if (ret != NULL) {
5997 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5998 cur = ret;
5999 /*
6000 * Some normalization:
6001 * (a | b* | c?)* == (a | b | c)*
6002 */
6003 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6004 if ((cur->c1 != NULL) &&
6005 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6006 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6007 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6008 if ((cur->c2 != NULL) &&
6009 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6010 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6011 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6012 cur = cur->c2;
6013 }
6014 }
6015 NEXT;
6016 } else if (RAW == '+') {
6017 if (ret != NULL) {
6018 int found = 0;
6019
6020 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6021 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6022 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6023 else
6024 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6025 /*
6026 * Some normalization:
6027 * (a | b*)+ == (a | b)*
6028 * (a | b?)+ == (a | b)*
6029 */
6030 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6031 if ((cur->c1 != NULL) &&
6032 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6033 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6034 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6035 found = 1;
6036 }
6037 if ((cur->c2 != NULL) &&
6038 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6039 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6040 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6041 found = 1;
6042 }
6043 cur = cur->c2;
6044 }
6045 if (found)
6046 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6047 }
6048 NEXT;
6049 }
6050 return(ret);
6051 }
6052
6053 /**
6054 * xmlParseElementContentDecl:
6055 * @ctxt: an XML parser context
6056 * @name: the name of the element being defined.
6057 * @result: the Element Content pointer will be stored here if any
6058 *
6059 * parse the declaration for an Element content either Mixed or Children,
6060 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6061 *
6062 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6063 *
6064 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6065 */
6066
6067 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6068 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6069 xmlElementContentPtr *result) {
6070
6071 xmlElementContentPtr tree = NULL;
6072 int inputid = ctxt->input->id;
6073 int res;
6074
6075 *result = NULL;
6076
6077 if (RAW != '(') {
6078 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6079 "xmlParseElementContentDecl : %s '(' expected\n", name);
6080 return(-1);
6081 }
6082 NEXT;
6083 GROW;
6084 SKIP_BLANKS;
6085 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6086 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6087 res = XML_ELEMENT_TYPE_MIXED;
6088 } else {
6089 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
6090 res = XML_ELEMENT_TYPE_ELEMENT;
6091 }
6092 SKIP_BLANKS;
6093 *result = tree;
6094 return(res);
6095 }
6096
6097 /**
6098 * xmlParseElementDecl:
6099 * @ctxt: an XML parser context
6100 *
6101 * parse an Element declaration.
6102 *
6103 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6104 *
6105 * [ VC: Unique Element Type Declaration ]
6106 * No element type may be declared more than once
6107 *
6108 * Returns the type of the element, or -1 in case of error
6109 */
6110 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6111 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6112 const xmlChar *name;
6113 int ret = -1;
6114 xmlElementContentPtr content = NULL;
6115
6116 /* GROW; done in the caller */
6117 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6118 xmlParserInputPtr input = ctxt->input;
6119
6120 SKIP(9);
6121 if (!IS_BLANK_CH(CUR)) {
6122 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6123 "Space required after 'ELEMENT'\n");
6124 }
6125 SKIP_BLANKS;
6126 name = xmlParseName(ctxt);
6127 if (name == NULL) {
6128 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6129 "xmlParseElementDecl: no name for Element\n");
6130 return(-1);
6131 }
6132 while ((RAW == 0) && (ctxt->inputNr > 1))
6133 xmlPopInput(ctxt);
6134 if (!IS_BLANK_CH(CUR)) {
6135 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6136 "Space required after the element name\n");
6137 }
6138 SKIP_BLANKS;
6139 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6140 SKIP(5);
6141 /*
6142 * Element must always be empty.
6143 */
6144 ret = XML_ELEMENT_TYPE_EMPTY;
6145 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6146 (NXT(2) == 'Y')) {
6147 SKIP(3);
6148 /*
6149 * Element is a generic container.
6150 */
6151 ret = XML_ELEMENT_TYPE_ANY;
6152 } else if (RAW == '(') {
6153 ret = xmlParseElementContentDecl(ctxt, name, &content);
6154 } else {
6155 /*
6156 * [ WFC: PEs in Internal Subset ] error handling.
6157 */
6158 if ((RAW == '%') && (ctxt->external == 0) &&
6159 (ctxt->inputNr == 1)) {
6160 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6161 "PEReference: forbidden within markup decl in internal subset\n");
6162 } else {
6163 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6164 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6165 }
6166 return(-1);
6167 }
6168
6169 SKIP_BLANKS;
6170 /*
6171 * Pop-up of finished entities.
6172 */
6173 while ((RAW == 0) && (ctxt->inputNr > 1))
6174 xmlPopInput(ctxt);
6175 SKIP_BLANKS;
6176
6177 if (RAW != '>') {
6178 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6179 if (content != NULL) {
6180 xmlFreeDocElementContent(ctxt->myDoc, content);
6181 }
6182 } else {
6183 if (input != ctxt->input) {
6184 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6185 "Element declaration doesn't start and stop in the same entity\n");
6186 }
6187
6188 NEXT;
6189 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6190 (ctxt->sax->elementDecl != NULL)) {
6191 if (content != NULL)
6192 content->parent = NULL;
6193 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6194 content);
6195 if ((content != NULL) && (content->parent == NULL)) {
6196 /*
6197 * this is a trick: if xmlAddElementDecl is called,
6198 * instead of copying the full tree it is plugged directly
6199 * if called from the parser. Avoid duplicating the
6200 * interfaces or change the API/ABI
6201 */
6202 xmlFreeDocElementContent(ctxt->myDoc, content);
6203 }
6204 } else if (content != NULL) {
6205 xmlFreeDocElementContent(ctxt->myDoc, content);
6206 }
6207 }
6208 }
6209 return(ret);
6210 }
6211
6212 /**
6213 * xmlParseConditionalSections
6214 * @ctxt: an XML parser context
6215 *
6216 * [61] conditionalSect ::= includeSect | ignoreSect
6217 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6218 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6219 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6220 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6221 */
6222
6223 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6224 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6225 int id = ctxt->input->id;
6226
6227 SKIP(3);
6228 SKIP_BLANKS;
6229 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6230 SKIP(7);
6231 SKIP_BLANKS;
6232 if (RAW != '[') {
6233 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6234 } else {
6235 if (ctxt->input->id != id) {
6236 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6237 "All markup of the conditional section is not in the same entity\n",
6238 NULL, NULL);
6239 }
6240 NEXT;
6241 }
6242 if (xmlParserDebugEntities) {
6243 if ((ctxt->input != NULL) && (ctxt->input->filename))
6244 xmlGenericError(xmlGenericErrorContext,
6245 "%s(%d): ", ctxt->input->filename,
6246 ctxt->input->line);
6247 xmlGenericError(xmlGenericErrorContext,
6248 "Entering INCLUDE Conditional Section\n");
6249 }
6250
6251 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6252 (NXT(2) != '>'))) {
6253 const xmlChar *check = CUR_PTR;
6254 unsigned int cons = ctxt->input->consumed;
6255
6256 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6257 xmlParseConditionalSections(ctxt);
6258 } else if (IS_BLANK_CH(CUR)) {
6259 NEXT;
6260 } else if (RAW == '%') {
6261 xmlParsePEReference(ctxt);
6262 } else
6263 xmlParseMarkupDecl(ctxt);
6264
6265 /*
6266 * Pop-up of finished entities.
6267 */
6268 while ((RAW == 0) && (ctxt->inputNr > 1))
6269 xmlPopInput(ctxt);
6270
6271 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6272 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6273 break;
6274 }
6275 }
6276 if (xmlParserDebugEntities) {
6277 if ((ctxt->input != NULL) && (ctxt->input->filename))
6278 xmlGenericError(xmlGenericErrorContext,
6279 "%s(%d): ", ctxt->input->filename,
6280 ctxt->input->line);
6281 xmlGenericError(xmlGenericErrorContext,
6282 "Leaving INCLUDE Conditional Section\n");
6283 }
6284
6285 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6286 int state;
6287 xmlParserInputState instate;
6288 int depth = 0;
6289
6290 SKIP(6);
6291 SKIP_BLANKS;
6292 if (RAW != '[') {
6293 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6294 } else {
6295 if (ctxt->input->id != id) {
6296 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6297 "All markup of the conditional section is not in the same entity\n",
6298 NULL, NULL);
6299 }
6300 NEXT;
6301 }
6302 if (xmlParserDebugEntities) {
6303 if ((ctxt->input != NULL) && (ctxt->input->filename))
6304 xmlGenericError(xmlGenericErrorContext,
6305 "%s(%d): ", ctxt->input->filename,
6306 ctxt->input->line);
6307 xmlGenericError(xmlGenericErrorContext,
6308 "Entering IGNORE Conditional Section\n");
6309 }
6310
6311 /*
6312 * Parse up to the end of the conditional section
6313 * But disable SAX event generating DTD building in the meantime
6314 */
6315 state = ctxt->disableSAX;
6316 instate = ctxt->instate;
6317 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6318 ctxt->instate = XML_PARSER_IGNORE;
6319
6320 while ((depth >= 0) && (RAW != 0)) {
6321 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6322 depth++;
6323 SKIP(3);
6324 continue;
6325 }
6326 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6327 if (--depth >= 0) SKIP(3);
6328 continue;
6329 }
6330 NEXT;
6331 continue;
6332 }
6333
6334 ctxt->disableSAX = state;
6335 ctxt->instate = instate;
6336
6337 if (xmlParserDebugEntities) {
6338 if ((ctxt->input != NULL) && (ctxt->input->filename))
6339 xmlGenericError(xmlGenericErrorContext,
6340 "%s(%d): ", ctxt->input->filename,
6341 ctxt->input->line);
6342 xmlGenericError(xmlGenericErrorContext,
6343 "Leaving IGNORE Conditional Section\n");
6344 }
6345
6346 } else {
6347 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6348 }
6349
6350 if (RAW == 0)
6351 SHRINK;
6352
6353 if (RAW == 0) {
6354 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6355 } else {
6356 if (ctxt->input->id != id) {
6357 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6358 "All markup of the conditional section is not in the same entity\n",
6359 NULL, NULL);
6360 }
6361 SKIP(3);
6362 }
6363 }
6364
6365 /**
6366 * xmlParseMarkupDecl:
6367 * @ctxt: an XML parser context
6368 *
6369 * parse Markup declarations
6370 *
6371 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6372 * NotationDecl | PI | Comment
6373 *
6374 * [ VC: Proper Declaration/PE Nesting ]
6375 * Parameter-entity replacement text must be properly nested with
6376 * markup declarations. That is to say, if either the first character
6377 * or the last character of a markup declaration (markupdecl above) is
6378 * contained in the replacement text for a parameter-entity reference,
6379 * both must be contained in the same replacement text.
6380 *
6381 * [ WFC: PEs in Internal Subset ]
6382 * In the internal DTD subset, parameter-entity references can occur
6383 * only where markup declarations can occur, not within markup declarations.
6384 * (This does not apply to references that occur in external parameter
6385 * entities or to the external subset.)
6386 */
6387 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6388 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6389 GROW;
6390 if (CUR == '<') {
6391 if (NXT(1) == '!') {
6392 switch (NXT(2)) {
6393 case 'E':
6394 if (NXT(3) == 'L')
6395 xmlParseElementDecl(ctxt);
6396 else if (NXT(3) == 'N')
6397 xmlParseEntityDecl(ctxt);
6398 break;
6399 case 'A':
6400 xmlParseAttributeListDecl(ctxt);
6401 break;
6402 case 'N':
6403 xmlParseNotationDecl(ctxt);
6404 break;
6405 case '-':
6406 xmlParseComment(ctxt);
6407 break;
6408 default:
6409 /* there is an error but it will be detected later */
6410 break;
6411 }
6412 } else if (NXT(1) == '?') {
6413 xmlParsePI(ctxt);
6414 }
6415 }
6416 /*
6417 * This is only for internal subset. On external entities,
6418 * the replacement is done before parsing stage
6419 */
6420 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6421 xmlParsePEReference(ctxt);
6422
6423 /*
6424 * Conditional sections are allowed from entities included
6425 * by PE References in the internal subset.
6426 */
6427 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6428 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6429 xmlParseConditionalSections(ctxt);
6430 }
6431 }
6432
6433 ctxt->instate = XML_PARSER_DTD;
6434 }
6435
6436 /**
6437 * xmlParseTextDecl:
6438 * @ctxt: an XML parser context
6439 *
6440 * parse an XML declaration header for external entities
6441 *
6442 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6443 */
6444
6445 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6446 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6447 xmlChar *version;
6448 const xmlChar *encoding;
6449
6450 /*
6451 * We know that '<?xml' is here.
6452 */
6453 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6454 SKIP(5);
6455 } else {
6456 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6457 return;
6458 }
6459
6460 if (!IS_BLANK_CH(CUR)) {
6461 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6462 "Space needed after '<?xml'\n");
6463 }
6464 SKIP_BLANKS;
6465
6466 /*
6467 * We may have the VersionInfo here.
6468 */
6469 version = xmlParseVersionInfo(ctxt);
6470 if (version == NULL)
6471 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6472 else {
6473 if (!IS_BLANK_CH(CUR)) {
6474 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6475 "Space needed here\n");
6476 }
6477 }
6478 ctxt->input->version = version;
6479
6480 /*
6481 * We must have the encoding declaration
6482 */
6483 encoding = xmlParseEncodingDecl(ctxt);
6484 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6485 /*
6486 * The XML REC instructs us to stop parsing right here
6487 */
6488 return;
6489 }
6490 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6491 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6492 "Missing encoding in text declaration\n");
6493 }
6494
6495 SKIP_BLANKS;
6496 if ((RAW == '?') && (NXT(1) == '>')) {
6497 SKIP(2);
6498 } else if (RAW == '>') {
6499 /* Deprecated old WD ... */
6500 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6501 NEXT;
6502 } else {
6503 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6504 MOVETO_ENDTAG(CUR_PTR);
6505 NEXT;
6506 }
6507 }
6508
6509 /**
6510 * xmlParseExternalSubset:
6511 * @ctxt: an XML parser context
6512 * @ExternalID: the external identifier
6513 * @SystemID: the system identifier (or URL)
6514 *
6515 * parse Markup declarations from an external subset
6516 *
6517 * [30] extSubset ::= textDecl? extSubsetDecl
6518 *
6519 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6520 */
6521 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6522 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6523 const xmlChar *SystemID) {
6524 xmlDetectSAX2(ctxt);
6525 GROW;
6526
6527 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6528 (ctxt->input->end - ctxt->input->cur >= 4)) {
6529 xmlChar start[4];
6530 xmlCharEncoding enc;
6531
6532 start[0] = RAW;
6533 start[1] = NXT(1);
6534 start[2] = NXT(2);
6535 start[3] = NXT(3);
6536 enc = xmlDetectCharEncoding(start, 4);
6537 if (enc != XML_CHAR_ENCODING_NONE)
6538 xmlSwitchEncoding(ctxt, enc);
6539 }
6540
6541 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6542 xmlParseTextDecl(ctxt);
6543 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6544 /*
6545 * The XML REC instructs us to stop parsing right here
6546 */
6547 ctxt->instate = XML_PARSER_EOF;
6548 return;
6549 }
6550 }
6551 if (ctxt->myDoc == NULL) {
6552 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6553 if (ctxt->myDoc == NULL) {
6554 xmlErrMemory(ctxt, "New Doc failed");
6555 return;
6556 }
6557 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6558 }
6559 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6560 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6561
6562 ctxt->instate = XML_PARSER_DTD;
6563 ctxt->external = 1;
6564 while (((RAW == '<') && (NXT(1) == '?')) ||
6565 ((RAW == '<') && (NXT(1) == '!')) ||
6566 (RAW == '%') || IS_BLANK_CH(CUR)) {
6567 const xmlChar *check = CUR_PTR;
6568 unsigned int cons = ctxt->input->consumed;
6569
6570 GROW;
6571 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6572 xmlParseConditionalSections(ctxt);
6573 } else if (IS_BLANK_CH(CUR)) {
6574 NEXT;
6575 } else if (RAW == '%') {
6576 xmlParsePEReference(ctxt);
6577 } else
6578 xmlParseMarkupDecl(ctxt);
6579
6580 /*
6581 * Pop-up of finished entities.
6582 */
6583 while ((RAW == 0) && (ctxt->inputNr > 1))
6584 xmlPopInput(ctxt);
6585
6586 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6587 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6588 break;
6589 }
6590 }
6591
6592 if (RAW != 0) {
6593 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6594 }
6595
6596 }
6597
6598 /**
6599 * xmlParseReference:
6600 * @ctxt: an XML parser context
6601 *
6602 * parse and handle entity references in content, depending on the SAX
6603 * interface, this may end-up in a call to character() if this is a
6604 * CharRef, a predefined entity, if there is no reference() callback.
6605 * or if the parser was asked to switch to that mode.
6606 *
6607 * [67] Reference ::= EntityRef | CharRef
6608 */
6609 void
xmlParseReference(xmlParserCtxtPtr ctxt)6610 xmlParseReference(xmlParserCtxtPtr ctxt) {
6611 xmlEntityPtr ent;
6612 xmlChar *val;
6613 int was_checked;
6614 xmlNodePtr list = NULL;
6615 xmlParserErrors ret = XML_ERR_OK;
6616
6617
6618 if (RAW != '&')
6619 return;
6620
6621 /*
6622 * Simple case of a CharRef
6623 */
6624 if (NXT(1) == '#') {
6625 int i = 0;
6626 xmlChar out[10];
6627 int hex = NXT(2);
6628 int value = xmlParseCharRef(ctxt);
6629
6630 if (value == 0)
6631 return;
6632 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6633 /*
6634 * So we are using non-UTF-8 buffers
6635 * Check that the char fit on 8bits, if not
6636 * generate a CharRef.
6637 */
6638 if (value <= 0xFF) {
6639 out[0] = value;
6640 out[1] = 0;
6641 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6642 (!ctxt->disableSAX))
6643 ctxt->sax->characters(ctxt->userData, out, 1);
6644 } else {
6645 if ((hex == 'x') || (hex == 'X'))
6646 snprintf((char *)out, sizeof(out), "#x%X", value);
6647 else
6648 snprintf((char *)out, sizeof(out), "#%d", value);
6649 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6650 (!ctxt->disableSAX))
6651 ctxt->sax->reference(ctxt->userData, out);
6652 }
6653 } else {
6654 /*
6655 * Just encode the value in UTF-8
6656 */
6657 COPY_BUF(0 ,out, i, value);
6658 out[i] = 0;
6659 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6660 (!ctxt->disableSAX))
6661 ctxt->sax->characters(ctxt->userData, out, i);
6662 }
6663 return;
6664 }
6665
6666 /*
6667 * We are seeing an entity reference
6668 */
6669 ent = xmlParseEntityRef(ctxt);
6670 if (ent == NULL) return;
6671 if (!ctxt->wellFormed)
6672 return;
6673 was_checked = ent->checked;
6674
6675 /* special case of predefined entities */
6676 if ((ent->name == NULL) ||
6677 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6678 val = ent->content;
6679 if (val == NULL) return;
6680 /*
6681 * inline the entity.
6682 */
6683 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6684 (!ctxt->disableSAX))
6685 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6686 return;
6687 }
6688
6689 /*
6690 * The first reference to the entity trigger a parsing phase
6691 * where the ent->children is filled with the result from
6692 * the parsing.
6693 */
6694 if (ent->checked == 0) {
6695 unsigned long oldnbent = ctxt->nbentities;
6696
6697 /*
6698 * This is a bit hackish but this seems the best
6699 * way to make sure both SAX and DOM entity support
6700 * behaves okay.
6701 */
6702 void *user_data;
6703 if (ctxt->userData == ctxt)
6704 user_data = NULL;
6705 else
6706 user_data = ctxt->userData;
6707
6708 /*
6709 * Check that this entity is well formed
6710 * 4.3.2: An internal general parsed entity is well-formed
6711 * if its replacement text matches the production labeled
6712 * content.
6713 */
6714 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6715 ctxt->depth++;
6716 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6717 user_data, &list);
6718 ctxt->depth--;
6719
6720 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6721 ctxt->depth++;
6722 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6723 user_data, ctxt->depth, ent->URI,
6724 ent->ExternalID, &list);
6725 ctxt->depth--;
6726 } else {
6727 ret = XML_ERR_ENTITY_PE_INTERNAL;
6728 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6729 "invalid entity type found\n", NULL);
6730 }
6731
6732 /*
6733 * Store the number of entities needing parsing for this entity
6734 * content and do checkings
6735 */
6736 ent->checked = ctxt->nbentities - oldnbent;
6737 if (ret == XML_ERR_ENTITY_LOOP) {
6738 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6739 xmlFreeNodeList(list);
6740 return;
6741 }
6742 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6743 xmlFreeNodeList(list);
6744 return;
6745 }
6746
6747 if ((ret == XML_ERR_OK) && (list != NULL)) {
6748 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6749 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6750 (ent->children == NULL)) {
6751 ent->children = list;
6752 if (ctxt->replaceEntities) {
6753 /*
6754 * Prune it directly in the generated document
6755 * except for single text nodes.
6756 */
6757 if (((list->type == XML_TEXT_NODE) &&
6758 (list->next == NULL)) ||
6759 (ctxt->parseMode == XML_PARSE_READER)) {
6760 list->parent = (xmlNodePtr) ent;
6761 list = NULL;
6762 ent->owner = 1;
6763 } else {
6764 ent->owner = 0;
6765 while (list != NULL) {
6766 list->parent = (xmlNodePtr) ctxt->node;
6767 list->doc = ctxt->myDoc;
6768 if (list->next == NULL)
6769 ent->last = list;
6770 list = list->next;
6771 }
6772 list = ent->children;
6773 #ifdef LIBXML_LEGACY_ENABLED
6774 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6775 xmlAddEntityReference(ent, list, NULL);
6776 #endif /* LIBXML_LEGACY_ENABLED */
6777 }
6778 } else {
6779 ent->owner = 1;
6780 while (list != NULL) {
6781 list->parent = (xmlNodePtr) ent;
6782 if (list->next == NULL)
6783 ent->last = list;
6784 list = list->next;
6785 }
6786 }
6787 } else {
6788 xmlFreeNodeList(list);
6789 list = NULL;
6790 }
6791 } else if ((ret != XML_ERR_OK) &&
6792 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6793 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6794 "Entity '%s' failed to parse\n", ent->name);
6795 } else if (list != NULL) {
6796 xmlFreeNodeList(list);
6797 list = NULL;
6798 }
6799 if (ent->checked == 0)
6800 ent->checked = 1;
6801 } else if (ent->checked != 1) {
6802 ctxt->nbentities += ent->checked;
6803 }
6804
6805 /*
6806 * Now that the entity content has been gathered
6807 * provide it to the application, this can take different forms based
6808 * on the parsing modes.
6809 */
6810 if (ent->children == NULL) {
6811 /*
6812 * Probably running in SAX mode and the callbacks don't
6813 * build the entity content. So unless we already went
6814 * though parsing for first checking go though the entity
6815 * content to generate callbacks associated to the entity
6816 */
6817 if (was_checked != 0) {
6818 void *user_data;
6819 /*
6820 * This is a bit hackish but this seems the best
6821 * way to make sure both SAX and DOM entity support
6822 * behaves okay.
6823 */
6824 if (ctxt->userData == ctxt)
6825 user_data = NULL;
6826 else
6827 user_data = ctxt->userData;
6828
6829 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6830 ctxt->depth++;
6831 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6832 ent->content, user_data, NULL);
6833 ctxt->depth--;
6834 } else if (ent->etype ==
6835 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6836 ctxt->depth++;
6837 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6838 ctxt->sax, user_data, ctxt->depth,
6839 ent->URI, ent->ExternalID, NULL);
6840 ctxt->depth--;
6841 } else {
6842 ret = XML_ERR_ENTITY_PE_INTERNAL;
6843 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6844 "invalid entity type found\n", NULL);
6845 }
6846 if (ret == XML_ERR_ENTITY_LOOP) {
6847 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6848 return;
6849 }
6850 }
6851 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6852 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6853 /*
6854 * Entity reference callback comes second, it's somewhat
6855 * superfluous but a compatibility to historical behaviour
6856 */
6857 ctxt->sax->reference(ctxt->userData, ent->name);
6858 }
6859 return;
6860 }
6861
6862 /*
6863 * If we didn't get any children for the entity being built
6864 */
6865 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6866 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6867 /*
6868 * Create a node.
6869 */
6870 ctxt->sax->reference(ctxt->userData, ent->name);
6871 return;
6872 }
6873
6874 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6875 /*
6876 * There is a problem on the handling of _private for entities
6877 * (bug 155816): Should we copy the content of the field from
6878 * the entity (possibly overwriting some value set by the user
6879 * when a copy is created), should we leave it alone, or should
6880 * we try to take care of different situations? The problem
6881 * is exacerbated by the usage of this field by the xmlReader.
6882 * To fix this bug, we look at _private on the created node
6883 * and, if it's NULL, we copy in whatever was in the entity.
6884 * If it's not NULL we leave it alone. This is somewhat of a
6885 * hack - maybe we should have further tests to determine
6886 * what to do.
6887 */
6888 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6889 /*
6890 * Seems we are generating the DOM content, do
6891 * a simple tree copy for all references except the first
6892 * In the first occurrence list contains the replacement.
6893 * progressive == 2 means we are operating on the Reader
6894 * and since nodes are discarded we must copy all the time.
6895 */
6896 if (((list == NULL) && (ent->owner == 0)) ||
6897 (ctxt->parseMode == XML_PARSE_READER)) {
6898 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6899
6900 /*
6901 * when operating on a reader, the entities definitions
6902 * are always owning the entities subtree.
6903 if (ctxt->parseMode == XML_PARSE_READER)
6904 ent->owner = 1;
6905 */
6906
6907 cur = ent->children;
6908 while (cur != NULL) {
6909 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6910 if (nw != NULL) {
6911 if (nw->_private == NULL)
6912 nw->_private = cur->_private;
6913 if (firstChild == NULL){
6914 firstChild = nw;
6915 }
6916 nw = xmlAddChild(ctxt->node, nw);
6917 }
6918 if (cur == ent->last) {
6919 /*
6920 * needed to detect some strange empty
6921 * node cases in the reader tests
6922 */
6923 if ((ctxt->parseMode == XML_PARSE_READER) &&
6924 (nw != NULL) &&
6925 (nw->type == XML_ELEMENT_NODE) &&
6926 (nw->children == NULL))
6927 nw->extra = 1;
6928
6929 break;
6930 }
6931 cur = cur->next;
6932 }
6933 #ifdef LIBXML_LEGACY_ENABLED
6934 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6935 xmlAddEntityReference(ent, firstChild, nw);
6936 #endif /* LIBXML_LEGACY_ENABLED */
6937 } else if (list == NULL) {
6938 xmlNodePtr nw = NULL, cur, next, last,
6939 firstChild = NULL;
6940 /*
6941 * Copy the entity child list and make it the new
6942 * entity child list. The goal is to make sure any
6943 * ID or REF referenced will be the one from the
6944 * document content and not the entity copy.
6945 */
6946 cur = ent->children;
6947 ent->children = NULL;
6948 last = ent->last;
6949 ent->last = NULL;
6950 while (cur != NULL) {
6951 next = cur->next;
6952 cur->next = NULL;
6953 cur->parent = NULL;
6954 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6955 if (nw != NULL) {
6956 if (nw->_private == NULL)
6957 nw->_private = cur->_private;
6958 if (firstChild == NULL){
6959 firstChild = cur;
6960 }
6961 xmlAddChild((xmlNodePtr) ent, nw);
6962 xmlAddChild(ctxt->node, cur);
6963 }
6964 if (cur == last)
6965 break;
6966 cur = next;
6967 }
6968 if (ent->owner == 0)
6969 ent->owner = 1;
6970 #ifdef LIBXML_LEGACY_ENABLED
6971 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6972 xmlAddEntityReference(ent, firstChild, nw);
6973 #endif /* LIBXML_LEGACY_ENABLED */
6974 } else {
6975 const xmlChar *nbktext;
6976
6977 /*
6978 * the name change is to avoid coalescing of the
6979 * node with a possible previous text one which
6980 * would make ent->children a dangling pointer
6981 */
6982 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6983 -1);
6984 if (ent->children->type == XML_TEXT_NODE)
6985 ent->children->name = nbktext;
6986 if ((ent->last != ent->children) &&
6987 (ent->last->type == XML_TEXT_NODE))
6988 ent->last->name = nbktext;
6989 xmlAddChildList(ctxt->node, ent->children);
6990 }
6991
6992 /*
6993 * This is to avoid a nasty side effect, see
6994 * characters() in SAX.c
6995 */
6996 ctxt->nodemem = 0;
6997 ctxt->nodelen = 0;
6998 return;
6999 }
7000 }
7001 }
7002
7003 /**
7004 * xmlParseEntityRef:
7005 * @ctxt: an XML parser context
7006 *
7007 * parse ENTITY references declarations
7008 *
7009 * [68] EntityRef ::= '&' Name ';'
7010 *
7011 * [ WFC: Entity Declared ]
7012 * In a document without any DTD, a document with only an internal DTD
7013 * subset which contains no parameter entity references, or a document
7014 * with "standalone='yes'", the Name given in the entity reference
7015 * must match that in an entity declaration, except that well-formed
7016 * documents need not declare any of the following entities: amp, lt,
7017 * gt, apos, quot. The declaration of a parameter entity must precede
7018 * any reference to it. Similarly, the declaration of a general entity
7019 * must precede any reference to it which appears in a default value in an
7020 * attribute-list declaration. Note that if entities are declared in the
7021 * external subset or in external parameter entities, a non-validating
7022 * processor is not obligated to read and process their declarations;
7023 * for such documents, the rule that an entity must be declared is a
7024 * well-formedness constraint only if standalone='yes'.
7025 *
7026 * [ WFC: Parsed Entity ]
7027 * An entity reference must not contain the name of an unparsed entity
7028 *
7029 * Returns the xmlEntityPtr if found, or NULL otherwise.
7030 */
7031 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7032 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7033 const xmlChar *name;
7034 xmlEntityPtr ent = NULL;
7035
7036 GROW;
7037
7038 if (RAW != '&')
7039 return(NULL);
7040 NEXT;
7041 name = xmlParseName(ctxt);
7042 if (name == NULL) {
7043 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7044 "xmlParseEntityRef: no name\n");
7045 return(NULL);
7046 }
7047 if (RAW != ';') {
7048 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7049 return(NULL);
7050 }
7051 NEXT;
7052
7053 /*
7054 * Predefined entites override any extra definition
7055 */
7056 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7057 ent = xmlGetPredefinedEntity(name);
7058 if (ent != NULL)
7059 return(ent);
7060 }
7061
7062 /*
7063 * Increate the number of entity references parsed
7064 */
7065 ctxt->nbentities++;
7066
7067 /*
7068 * Ask first SAX for entity resolution, otherwise try the
7069 * entities which may have stored in the parser context.
7070 */
7071 if (ctxt->sax != NULL) {
7072 if (ctxt->sax->getEntity != NULL)
7073 ent = ctxt->sax->getEntity(ctxt->userData, name);
7074 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7075 (ctxt->options & XML_PARSE_OLDSAX))
7076 ent = xmlGetPredefinedEntity(name);
7077 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7078 (ctxt->userData==ctxt)) {
7079 ent = xmlSAX2GetEntity(ctxt, name);
7080 }
7081 }
7082 /*
7083 * [ WFC: Entity Declared ]
7084 * In a document without any DTD, a document with only an
7085 * internal DTD subset which contains no parameter entity
7086 * references, or a document with "standalone='yes'", the
7087 * Name given in the entity reference must match that in an
7088 * entity declaration, except that well-formed documents
7089 * need not declare any of the following entities: amp, lt,
7090 * gt, apos, quot.
7091 * The declaration of a parameter entity must precede any
7092 * reference to it.
7093 * Similarly, the declaration of a general entity must
7094 * precede any reference to it which appears in a default
7095 * value in an attribute-list declaration. Note that if
7096 * entities are declared in the external subset or in
7097 * external parameter entities, a non-validating processor
7098 * is not obligated to read and process their declarations;
7099 * for such documents, the rule that an entity must be
7100 * declared is a well-formedness constraint only if
7101 * standalone='yes'.
7102 */
7103 if (ent == NULL) {
7104 if ((ctxt->standalone == 1) ||
7105 ((ctxt->hasExternalSubset == 0) &&
7106 (ctxt->hasPErefs == 0))) {
7107 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7108 "Entity '%s' not defined\n", name);
7109 } else {
7110 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7111 "Entity '%s' not defined\n", name);
7112 if ((ctxt->inSubset == 0) &&
7113 (ctxt->sax != NULL) &&
7114 (ctxt->sax->reference != NULL)) {
7115 ctxt->sax->reference(ctxt->userData, name);
7116 }
7117 }
7118 ctxt->valid = 0;
7119 }
7120
7121 /*
7122 * [ WFC: Parsed Entity ]
7123 * An entity reference must not contain the name of an
7124 * unparsed entity
7125 */
7126 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7127 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7128 "Entity reference to unparsed entity %s\n", name);
7129 }
7130
7131 /*
7132 * [ WFC: No External Entity References ]
7133 * Attribute values cannot contain direct or indirect
7134 * entity references to external entities.
7135 */
7136 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7137 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7138 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7139 "Attribute references external entity '%s'\n", name);
7140 }
7141 /*
7142 * [ WFC: No < in Attribute Values ]
7143 * The replacement text of any entity referred to directly or
7144 * indirectly in an attribute value (other than "<") must
7145 * not contain a <.
7146 */
7147 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7148 (ent != NULL) && (ent->content != NULL) &&
7149 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7150 (xmlStrchr(ent->content, '<'))) {
7151 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7152 "'<' in entity '%s' is not allowed in attributes values\n", name);
7153 }
7154
7155 /*
7156 * Internal check, no parameter entities here ...
7157 */
7158 else {
7159 switch (ent->etype) {
7160 case XML_INTERNAL_PARAMETER_ENTITY:
7161 case XML_EXTERNAL_PARAMETER_ENTITY:
7162 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7163 "Attempt to reference the parameter entity '%s'\n",
7164 name);
7165 break;
7166 default:
7167 break;
7168 }
7169 }
7170
7171 /*
7172 * [ WFC: No Recursion ]
7173 * A parsed entity must not contain a recursive reference
7174 * to itself, either directly or indirectly.
7175 * Done somewhere else
7176 */
7177 return(ent);
7178 }
7179
7180 /**
7181 * xmlParseStringEntityRef:
7182 * @ctxt: an XML parser context
7183 * @str: a pointer to an index in the string
7184 *
7185 * parse ENTITY references declarations, but this version parses it from
7186 * a string value.
7187 *
7188 * [68] EntityRef ::= '&' Name ';'
7189 *
7190 * [ WFC: Entity Declared ]
7191 * In a document without any DTD, a document with only an internal DTD
7192 * subset which contains no parameter entity references, or a document
7193 * with "standalone='yes'", the Name given in the entity reference
7194 * must match that in an entity declaration, except that well-formed
7195 * documents need not declare any of the following entities: amp, lt,
7196 * gt, apos, quot. The declaration of a parameter entity must precede
7197 * any reference to it. Similarly, the declaration of a general entity
7198 * must precede any reference to it which appears in a default value in an
7199 * attribute-list declaration. Note that if entities are declared in the
7200 * external subset or in external parameter entities, a non-validating
7201 * processor is not obligated to read and process their declarations;
7202 * for such documents, the rule that an entity must be declared is a
7203 * well-formedness constraint only if standalone='yes'.
7204 *
7205 * [ WFC: Parsed Entity ]
7206 * An entity reference must not contain the name of an unparsed entity
7207 *
7208 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7209 * is updated to the current location in the string.
7210 */
7211 xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7212 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7213 xmlChar *name;
7214 const xmlChar *ptr;
7215 xmlChar cur;
7216 xmlEntityPtr ent = NULL;
7217
7218 if ((str == NULL) || (*str == NULL))
7219 return(NULL);
7220 ptr = *str;
7221 cur = *ptr;
7222 if (cur != '&')
7223 return(NULL);
7224
7225 ptr++;
7226 cur = *ptr;
7227 name = xmlParseStringName(ctxt, &ptr);
7228 if (name == NULL) {
7229 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7230 "xmlParseStringEntityRef: no name\n");
7231 *str = ptr;
7232 return(NULL);
7233 }
7234 if (*ptr != ';') {
7235 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7236 xmlFree(name);
7237 *str = ptr;
7238 return(NULL);
7239 }
7240 ptr++;
7241
7242
7243 /*
7244 * Predefined entites override any extra definition
7245 */
7246 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7247 ent = xmlGetPredefinedEntity(name);
7248 if (ent != NULL) {
7249 xmlFree(name);
7250 *str = ptr;
7251 return(ent);
7252 }
7253 }
7254
7255 /*
7256 * Increate the number of entity references parsed
7257 */
7258 ctxt->nbentities++;
7259
7260 /*
7261 * Ask first SAX for entity resolution, otherwise try the
7262 * entities which may have stored in the parser context.
7263 */
7264 if (ctxt->sax != NULL) {
7265 if (ctxt->sax->getEntity != NULL)
7266 ent = ctxt->sax->getEntity(ctxt->userData, name);
7267 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7268 ent = xmlGetPredefinedEntity(name);
7269 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7270 ent = xmlSAX2GetEntity(ctxt, name);
7271 }
7272 }
7273
7274 /*
7275 * [ WFC: Entity Declared ]
7276 * In a document without any DTD, a document with only an
7277 * internal DTD subset which contains no parameter entity
7278 * references, or a document with "standalone='yes'", the
7279 * Name given in the entity reference must match that in an
7280 * entity declaration, except that well-formed documents
7281 * need not declare any of the following entities: amp, lt,
7282 * gt, apos, quot.
7283 * The declaration of a parameter entity must precede any
7284 * reference to it.
7285 * Similarly, the declaration of a general entity must
7286 * precede any reference to it which appears in a default
7287 * value in an attribute-list declaration. Note that if
7288 * entities are declared in the external subset or in
7289 * external parameter entities, a non-validating processor
7290 * is not obligated to read and process their declarations;
7291 * for such documents, the rule that an entity must be
7292 * declared is a well-formedness constraint only if
7293 * standalone='yes'.
7294 */
7295 if (ent == NULL) {
7296 if ((ctxt->standalone == 1) ||
7297 ((ctxt->hasExternalSubset == 0) &&
7298 (ctxt->hasPErefs == 0))) {
7299 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7300 "Entity '%s' not defined\n", name);
7301 } else {
7302 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7303 "Entity '%s' not defined\n",
7304 name);
7305 }
7306 /* TODO ? check regressions ctxt->valid = 0; */
7307 }
7308
7309 /*
7310 * [ WFC: Parsed Entity ]
7311 * An entity reference must not contain the name of an
7312 * unparsed entity
7313 */
7314 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7315 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7316 "Entity reference to unparsed entity %s\n", name);
7317 }
7318
7319 /*
7320 * [ WFC: No External Entity References ]
7321 * Attribute values cannot contain direct or indirect
7322 * entity references to external entities.
7323 */
7324 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7325 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7326 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7327 "Attribute references external entity '%s'\n", name);
7328 }
7329 /*
7330 * [ WFC: No < in Attribute Values ]
7331 * The replacement text of any entity referred to directly or
7332 * indirectly in an attribute value (other than "<") must
7333 * not contain a <.
7334 */
7335 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7336 (ent != NULL) && (ent->content != NULL) &&
7337 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7338 (xmlStrchr(ent->content, '<'))) {
7339 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7340 "'<' in entity '%s' is not allowed in attributes values\n",
7341 name);
7342 }
7343
7344 /*
7345 * Internal check, no parameter entities here ...
7346 */
7347 else {
7348 switch (ent->etype) {
7349 case XML_INTERNAL_PARAMETER_ENTITY:
7350 case XML_EXTERNAL_PARAMETER_ENTITY:
7351 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7352 "Attempt to reference the parameter entity '%s'\n",
7353 name);
7354 break;
7355 default:
7356 break;
7357 }
7358 }
7359
7360 /*
7361 * [ WFC: No Recursion ]
7362 * A parsed entity must not contain a recursive reference
7363 * to itself, either directly or indirectly.
7364 * Done somewhere else
7365 */
7366
7367 xmlFree(name);
7368 *str = ptr;
7369 return(ent);
7370 }
7371
7372 /**
7373 * xmlParsePEReference:
7374 * @ctxt: an XML parser context
7375 *
7376 * parse PEReference declarations
7377 * The entity content is handled directly by pushing it's content as
7378 * a new input stream.
7379 *
7380 * [69] PEReference ::= '%' Name ';'
7381 *
7382 * [ WFC: No Recursion ]
7383 * A parsed entity must not contain a recursive
7384 * reference to itself, either directly or indirectly.
7385 *
7386 * [ WFC: Entity Declared ]
7387 * In a document without any DTD, a document with only an internal DTD
7388 * subset which contains no parameter entity references, or a document
7389 * with "standalone='yes'", ... ... The declaration of a parameter
7390 * entity must precede any reference to it...
7391 *
7392 * [ VC: Entity Declared ]
7393 * In a document with an external subset or external parameter entities
7394 * with "standalone='no'", ... ... The declaration of a parameter entity
7395 * must precede any reference to it...
7396 *
7397 * [ WFC: In DTD ]
7398 * Parameter-entity references may only appear in the DTD.
7399 * NOTE: misleading but this is handled.
7400 */
7401 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7402 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7403 {
7404 const xmlChar *name;
7405 xmlEntityPtr entity = NULL;
7406 xmlParserInputPtr input;
7407
7408 if (RAW != '%')
7409 return;
7410 NEXT;
7411 name = xmlParseName(ctxt);
7412 if (name == NULL) {
7413 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7414 "xmlParsePEReference: no name\n");
7415 return;
7416 }
7417 if (RAW != ';') {
7418 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7419 return;
7420 }
7421
7422 NEXT;
7423
7424 /*
7425 * Increate the number of entity references parsed
7426 */
7427 ctxt->nbentities++;
7428
7429 /*
7430 * Request the entity from SAX
7431 */
7432 if ((ctxt->sax != NULL) &&
7433 (ctxt->sax->getParameterEntity != NULL))
7434 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7435 name);
7436 if (entity == NULL) {
7437 /*
7438 * [ WFC: Entity Declared ]
7439 * In a document without any DTD, a document with only an
7440 * internal DTD subset which contains no parameter entity
7441 * references, or a document with "standalone='yes'", ...
7442 * ... The declaration of a parameter entity must precede
7443 * any reference to it...
7444 */
7445 if ((ctxt->standalone == 1) ||
7446 ((ctxt->hasExternalSubset == 0) &&
7447 (ctxt->hasPErefs == 0))) {
7448 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7449 "PEReference: %%%s; not found\n",
7450 name);
7451 } else {
7452 /*
7453 * [ VC: Entity Declared ]
7454 * In a document with an external subset or external
7455 * parameter entities with "standalone='no'", ...
7456 * ... The declaration of a parameter entity must
7457 * precede any reference to it...
7458 */
7459 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7460 "PEReference: %%%s; not found\n",
7461 name, NULL);
7462 ctxt->valid = 0;
7463 }
7464 } else {
7465 /*
7466 * Internal checking in case the entity quest barfed
7467 */
7468 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7469 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7470 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7471 "Internal: %%%s; is not a parameter entity\n",
7472 name, NULL);
7473 } else if (ctxt->input->free != deallocblankswrapper) {
7474 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7475 if (xmlPushInput(ctxt, input) < 0)
7476 return;
7477 } else {
7478 /*
7479 * TODO !!!
7480 * handle the extra spaces added before and after
7481 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7482 */
7483 input = xmlNewEntityInputStream(ctxt, entity);
7484 if (xmlPushInput(ctxt, input) < 0)
7485 return;
7486 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7487 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7488 (IS_BLANK_CH(NXT(5)))) {
7489 xmlParseTextDecl(ctxt);
7490 if (ctxt->errNo ==
7491 XML_ERR_UNSUPPORTED_ENCODING) {
7492 /*
7493 * The XML REC instructs us to stop parsing
7494 * right here
7495 */
7496 ctxt->instate = XML_PARSER_EOF;
7497 return;
7498 }
7499 }
7500 }
7501 }
7502 ctxt->hasPErefs = 1;
7503 }
7504
7505 /**
7506 * xmlLoadEntityContent:
7507 * @ctxt: an XML parser context
7508 * @entity: an unloaded system entity
7509 *
7510 * Load the original content of the given system entity from the
7511 * ExternalID/SystemID given. This is to be used for Included in Literal
7512 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7513 *
7514 * Returns 0 in case of success and -1 in case of failure
7515 */
7516 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7517 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7518 xmlParserInputPtr input;
7519 xmlBufferPtr buf;
7520 int l, c;
7521 int count = 0;
7522
7523 if ((ctxt == NULL) || (entity == NULL) ||
7524 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7525 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7526 (entity->content != NULL)) {
7527 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7528 "xmlLoadEntityContent parameter error");
7529 return(-1);
7530 }
7531
7532 if (xmlParserDebugEntities)
7533 xmlGenericError(xmlGenericErrorContext,
7534 "Reading %s entity content input\n", entity->name);
7535
7536 buf = xmlBufferCreate();
7537 if (buf == NULL) {
7538 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7539 "xmlLoadEntityContent parameter error");
7540 return(-1);
7541 }
7542
7543 input = xmlNewEntityInputStream(ctxt, entity);
7544 if (input == NULL) {
7545 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7546 "xmlLoadEntityContent input error");
7547 xmlBufferFree(buf);
7548 return(-1);
7549 }
7550
7551 /*
7552 * Push the entity as the current input, read char by char
7553 * saving to the buffer until the end of the entity or an error
7554 */
7555 if (xmlPushInput(ctxt, input) < 0) {
7556 xmlBufferFree(buf);
7557 return(-1);
7558 }
7559
7560 GROW;
7561 c = CUR_CHAR(l);
7562 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7563 (IS_CHAR(c))) {
7564 xmlBufferAdd(buf, ctxt->input->cur, l);
7565 if (count++ > 100) {
7566 count = 0;
7567 GROW;
7568 }
7569 NEXTL(l);
7570 c = CUR_CHAR(l);
7571 }
7572
7573 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7574 xmlPopInput(ctxt);
7575 } else if (!IS_CHAR(c)) {
7576 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7577 "xmlLoadEntityContent: invalid char value %d\n",
7578 c);
7579 xmlBufferFree(buf);
7580 return(-1);
7581 }
7582 entity->content = buf->content;
7583 buf->content = NULL;
7584 xmlBufferFree(buf);
7585
7586 return(0);
7587 }
7588
7589 /**
7590 * xmlParseStringPEReference:
7591 * @ctxt: an XML parser context
7592 * @str: a pointer to an index in the string
7593 *
7594 * parse PEReference declarations
7595 *
7596 * [69] PEReference ::= '%' Name ';'
7597 *
7598 * [ WFC: No Recursion ]
7599 * A parsed entity must not contain a recursive
7600 * reference to itself, either directly or indirectly.
7601 *
7602 * [ WFC: Entity Declared ]
7603 * In a document without any DTD, a document with only an internal DTD
7604 * subset which contains no parameter entity references, or a document
7605 * with "standalone='yes'", ... ... The declaration of a parameter
7606 * entity must precede any reference to it...
7607 *
7608 * [ VC: Entity Declared ]
7609 * In a document with an external subset or external parameter entities
7610 * with "standalone='no'", ... ... The declaration of a parameter entity
7611 * must precede any reference to it...
7612 *
7613 * [ WFC: In DTD ]
7614 * Parameter-entity references may only appear in the DTD.
7615 * NOTE: misleading but this is handled.
7616 *
7617 * Returns the string of the entity content.
7618 * str is updated to the current value of the index
7619 */
7620 xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)7621 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7622 const xmlChar *ptr;
7623 xmlChar cur;
7624 xmlChar *name;
7625 xmlEntityPtr entity = NULL;
7626
7627 if ((str == NULL) || (*str == NULL)) return(NULL);
7628 ptr = *str;
7629 cur = *ptr;
7630 if (cur != '%')
7631 return(NULL);
7632 ptr++;
7633 cur = *ptr;
7634 name = xmlParseStringName(ctxt, &ptr);
7635 if (name == NULL) {
7636 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7637 "xmlParseStringPEReference: no name\n");
7638 *str = ptr;
7639 return(NULL);
7640 }
7641 cur = *ptr;
7642 if (cur != ';') {
7643 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7644 xmlFree(name);
7645 *str = ptr;
7646 return(NULL);
7647 }
7648 ptr++;
7649
7650 /*
7651 * Increate the number of entity references parsed
7652 */
7653 ctxt->nbentities++;
7654
7655 /*
7656 * Request the entity from SAX
7657 */
7658 if ((ctxt->sax != NULL) &&
7659 (ctxt->sax->getParameterEntity != NULL))
7660 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7661 name);
7662 if (entity == NULL) {
7663 /*
7664 * [ WFC: Entity Declared ]
7665 * In a document without any DTD, a document with only an
7666 * internal DTD subset which contains no parameter entity
7667 * references, or a document with "standalone='yes'", ...
7668 * ... The declaration of a parameter entity must precede
7669 * any reference to it...
7670 */
7671 if ((ctxt->standalone == 1) ||
7672 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7673 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7674 "PEReference: %%%s; not found\n", name);
7675 } else {
7676 /*
7677 * [ VC: Entity Declared ]
7678 * In a document with an external subset or external
7679 * parameter entities with "standalone='no'", ...
7680 * ... The declaration of a parameter entity must
7681 * precede any reference to it...
7682 */
7683 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7684 "PEReference: %%%s; not found\n",
7685 name, NULL);
7686 ctxt->valid = 0;
7687 }
7688 } else {
7689 /*
7690 * Internal checking in case the entity quest barfed
7691 */
7692 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7693 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7694 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7695 "%%%s; is not a parameter entity\n",
7696 name, NULL);
7697 }
7698 }
7699 ctxt->hasPErefs = 1;
7700 xmlFree(name);
7701 *str = ptr;
7702 return(entity);
7703 }
7704
7705 /**
7706 * xmlParseDocTypeDecl:
7707 * @ctxt: an XML parser context
7708 *
7709 * parse a DOCTYPE declaration
7710 *
7711 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7712 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7713 *
7714 * [ VC: Root Element Type ]
7715 * The Name in the document type declaration must match the element
7716 * type of the root element.
7717 */
7718
7719 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)7720 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7721 const xmlChar *name = NULL;
7722 xmlChar *ExternalID = NULL;
7723 xmlChar *URI = NULL;
7724
7725 /*
7726 * We know that '<!DOCTYPE' has been detected.
7727 */
7728 SKIP(9);
7729
7730 SKIP_BLANKS;
7731
7732 /*
7733 * Parse the DOCTYPE name.
7734 */
7735 name = xmlParseName(ctxt);
7736 if (name == NULL) {
7737 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7738 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7739 }
7740 ctxt->intSubName = name;
7741
7742 SKIP_BLANKS;
7743
7744 /*
7745 * Check for SystemID and ExternalID
7746 */
7747 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7748
7749 if ((URI != NULL) || (ExternalID != NULL)) {
7750 ctxt->hasExternalSubset = 1;
7751 }
7752 ctxt->extSubURI = URI;
7753 ctxt->extSubSystem = ExternalID;
7754
7755 SKIP_BLANKS;
7756
7757 /*
7758 * Create and update the internal subset.
7759 */
7760 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7761 (!ctxt->disableSAX))
7762 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7763
7764 /*
7765 * Is there any internal subset declarations ?
7766 * they are handled separately in xmlParseInternalSubset()
7767 */
7768 if (RAW == '[')
7769 return;
7770
7771 /*
7772 * We should be at the end of the DOCTYPE declaration.
7773 */
7774 if (RAW != '>') {
7775 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7776 }
7777 NEXT;
7778 }
7779
7780 /**
7781 * xmlParseInternalSubset:
7782 * @ctxt: an XML parser context
7783 *
7784 * parse the internal subset declaration
7785 *
7786 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7787 */
7788
7789 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)7790 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7791 /*
7792 * Is there any DTD definition ?
7793 */
7794 if (RAW == '[') {
7795 ctxt->instate = XML_PARSER_DTD;
7796 NEXT;
7797 /*
7798 * Parse the succession of Markup declarations and
7799 * PEReferences.
7800 * Subsequence (markupdecl | PEReference | S)*
7801 */
7802 while (RAW != ']') {
7803 const xmlChar *check = CUR_PTR;
7804 unsigned int cons = ctxt->input->consumed;
7805
7806 SKIP_BLANKS;
7807 xmlParseMarkupDecl(ctxt);
7808 xmlParsePEReference(ctxt);
7809
7810 /*
7811 * Pop-up of finished entities.
7812 */
7813 while ((RAW == 0) && (ctxt->inputNr > 1))
7814 xmlPopInput(ctxt);
7815
7816 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7817 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7818 "xmlParseInternalSubset: error detected in Markup declaration\n");
7819 break;
7820 }
7821 }
7822 if (RAW == ']') {
7823 NEXT;
7824 SKIP_BLANKS;
7825 }
7826 }
7827
7828 /*
7829 * We should be at the end of the DOCTYPE declaration.
7830 */
7831 if (RAW != '>') {
7832 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7833 }
7834 NEXT;
7835 }
7836
7837 #ifdef LIBXML_SAX1_ENABLED
7838 /**
7839 * xmlParseAttribute:
7840 * @ctxt: an XML parser context
7841 * @value: a xmlChar ** used to store the value of the attribute
7842 *
7843 * parse an attribute
7844 *
7845 * [41] Attribute ::= Name Eq AttValue
7846 *
7847 * [ WFC: No External Entity References ]
7848 * Attribute values cannot contain direct or indirect entity references
7849 * to external entities.
7850 *
7851 * [ WFC: No < in Attribute Values ]
7852 * The replacement text of any entity referred to directly or indirectly in
7853 * an attribute value (other than "<") must not contain a <.
7854 *
7855 * [ VC: Attribute Value Type ]
7856 * The attribute must have been declared; the value must be of the type
7857 * declared for it.
7858 *
7859 * [25] Eq ::= S? '=' S?
7860 *
7861 * With namespace:
7862 *
7863 * [NS 11] Attribute ::= QName Eq AttValue
7864 *
7865 * Also the case QName == xmlns:??? is handled independently as a namespace
7866 * definition.
7867 *
7868 * Returns the attribute name, and the value in *value.
7869 */
7870
7871 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)7872 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7873 const xmlChar *name;
7874 xmlChar *val;
7875
7876 *value = NULL;
7877 GROW;
7878 name = xmlParseName(ctxt);
7879 if (name == NULL) {
7880 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7881 "error parsing attribute name\n");
7882 return(NULL);
7883 }
7884
7885 /*
7886 * read the value
7887 */
7888 SKIP_BLANKS;
7889 if (RAW == '=') {
7890 NEXT;
7891 SKIP_BLANKS;
7892 val = xmlParseAttValue(ctxt);
7893 ctxt->instate = XML_PARSER_CONTENT;
7894 } else {
7895 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7896 "Specification mandate value for attribute %s\n", name);
7897 return(NULL);
7898 }
7899
7900 /*
7901 * Check that xml:lang conforms to the specification
7902 * No more registered as an error, just generate a warning now
7903 * since this was deprecated in XML second edition
7904 */
7905 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7906 if (!xmlCheckLanguageID(val)) {
7907 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7908 "Malformed value for xml:lang : %s\n",
7909 val, NULL);
7910 }
7911 }
7912
7913 /*
7914 * Check that xml:space conforms to the specification
7915 */
7916 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7917 if (xmlStrEqual(val, BAD_CAST "default"))
7918 *(ctxt->space) = 0;
7919 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7920 *(ctxt->space) = 1;
7921 else {
7922 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7923 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7924 val, NULL);
7925 }
7926 }
7927
7928 *value = val;
7929 return(name);
7930 }
7931
7932 /**
7933 * xmlParseStartTag:
7934 * @ctxt: an XML parser context
7935 *
7936 * parse a start of tag either for rule element or
7937 * EmptyElement. In both case we don't parse the tag closing chars.
7938 *
7939 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7940 *
7941 * [ WFC: Unique Att Spec ]
7942 * No attribute name may appear more than once in the same start-tag or
7943 * empty-element tag.
7944 *
7945 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7946 *
7947 * [ WFC: Unique Att Spec ]
7948 * No attribute name may appear more than once in the same start-tag or
7949 * empty-element tag.
7950 *
7951 * With namespace:
7952 *
7953 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7954 *
7955 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7956 *
7957 * Returns the element name parsed
7958 */
7959
7960 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)7961 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7962 const xmlChar *name;
7963 const xmlChar *attname;
7964 xmlChar *attvalue;
7965 const xmlChar **atts = ctxt->atts;
7966 int nbatts = 0;
7967 int maxatts = ctxt->maxatts;
7968 int i;
7969
7970 if (RAW != '<') return(NULL);
7971 NEXT1;
7972
7973 name = xmlParseName(ctxt);
7974 if (name == NULL) {
7975 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7976 "xmlParseStartTag: invalid element name\n");
7977 return(NULL);
7978 }
7979
7980 /*
7981 * Now parse the attributes, it ends up with the ending
7982 *
7983 * (S Attribute)* S?
7984 */
7985 SKIP_BLANKS;
7986 GROW;
7987
7988 while ((RAW != '>') &&
7989 ((RAW != '/') || (NXT(1) != '>')) &&
7990 (IS_BYTE_CHAR(RAW))) {
7991 const xmlChar *q = CUR_PTR;
7992 unsigned int cons = ctxt->input->consumed;
7993
7994 attname = xmlParseAttribute(ctxt, &attvalue);
7995 if ((attname != NULL) && (attvalue != NULL)) {
7996 /*
7997 * [ WFC: Unique Att Spec ]
7998 * No attribute name may appear more than once in the same
7999 * start-tag or empty-element tag.
8000 */
8001 for (i = 0; i < nbatts;i += 2) {
8002 if (xmlStrEqual(atts[i], attname)) {
8003 xmlErrAttributeDup(ctxt, NULL, attname);
8004 xmlFree(attvalue);
8005 goto failed;
8006 }
8007 }
8008 /*
8009 * Add the pair to atts
8010 */
8011 if (atts == NULL) {
8012 maxatts = 22; /* allow for 10 attrs by default */
8013 atts = (const xmlChar **)
8014 xmlMalloc(maxatts * sizeof(xmlChar *));
8015 if (atts == NULL) {
8016 xmlErrMemory(ctxt, NULL);
8017 if (attvalue != NULL)
8018 xmlFree(attvalue);
8019 goto failed;
8020 }
8021 ctxt->atts = atts;
8022 ctxt->maxatts = maxatts;
8023 } else if (nbatts + 4 > maxatts) {
8024 const xmlChar **n;
8025
8026 maxatts *= 2;
8027 n = (const xmlChar **) xmlRealloc((void *) atts,
8028 maxatts * sizeof(const xmlChar *));
8029 if (n == NULL) {
8030 xmlErrMemory(ctxt, NULL);
8031 if (attvalue != NULL)
8032 xmlFree(attvalue);
8033 goto failed;
8034 }
8035 atts = n;
8036 ctxt->atts = atts;
8037 ctxt->maxatts = maxatts;
8038 }
8039 atts[nbatts++] = attname;
8040 atts[nbatts++] = attvalue;
8041 atts[nbatts] = NULL;
8042 atts[nbatts + 1] = NULL;
8043 } else {
8044 if (attvalue != NULL)
8045 xmlFree(attvalue);
8046 }
8047
8048 failed:
8049
8050 GROW
8051 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8052 break;
8053 if (!IS_BLANK_CH(RAW)) {
8054 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8055 "attributes construct error\n");
8056 }
8057 SKIP_BLANKS;
8058 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8059 (attname == NULL) && (attvalue == NULL)) {
8060 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8061 "xmlParseStartTag: problem parsing attributes\n");
8062 break;
8063 }
8064 SHRINK;
8065 GROW;
8066 }
8067
8068 /*
8069 * SAX: Start of Element !
8070 */
8071 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8072 (!ctxt->disableSAX)) {
8073 if (nbatts > 0)
8074 ctxt->sax->startElement(ctxt->userData, name, atts);
8075 else
8076 ctxt->sax->startElement(ctxt->userData, name, NULL);
8077 }
8078
8079 if (atts != NULL) {
8080 /* Free only the content strings */
8081 for (i = 1;i < nbatts;i+=2)
8082 if (atts[i] != NULL)
8083 xmlFree((xmlChar *) atts[i]);
8084 }
8085 return(name);
8086 }
8087
8088 /**
8089 * xmlParseEndTag1:
8090 * @ctxt: an XML parser context
8091 * @line: line of the start tag
8092 * @nsNr: number of namespaces on the start tag
8093 *
8094 * parse an end of tag
8095 *
8096 * [42] ETag ::= '</' Name S? '>'
8097 *
8098 * With namespace
8099 *
8100 * [NS 9] ETag ::= '</' QName S? '>'
8101 */
8102
8103 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8104 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8105 const xmlChar *name;
8106
8107 GROW;
8108 if ((RAW != '<') || (NXT(1) != '/')) {
8109 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8110 "xmlParseEndTag: '</' not found\n");
8111 return;
8112 }
8113 SKIP(2);
8114
8115 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8116
8117 /*
8118 * We should definitely be at the ending "S? '>'" part
8119 */
8120 GROW;
8121 SKIP_BLANKS;
8122 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8123 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8124 } else
8125 NEXT1;
8126
8127 /*
8128 * [ WFC: Element Type Match ]
8129 * The Name in an element's end-tag must match the element type in the
8130 * start-tag.
8131 *
8132 */
8133 if (name != (xmlChar*)1) {
8134 if (name == NULL) name = BAD_CAST "unparseable";
8135 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8136 "Opening and ending tag mismatch: %s line %d and %s\n",
8137 ctxt->name, line, name);
8138 }
8139
8140 /*
8141 * SAX: End of Tag
8142 */
8143 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8144 (!ctxt->disableSAX))
8145 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8146
8147 namePop(ctxt);
8148 spacePop(ctxt);
8149 return;
8150 }
8151
8152 /**
8153 * xmlParseEndTag:
8154 * @ctxt: an XML parser context
8155 *
8156 * parse an end of tag
8157 *
8158 * [42] ETag ::= '</' Name S? '>'
8159 *
8160 * With namespace
8161 *
8162 * [NS 9] ETag ::= '</' QName S? '>'
8163 */
8164
8165 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8166 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8167 xmlParseEndTag1(ctxt, 0);
8168 }
8169 #endif /* LIBXML_SAX1_ENABLED */
8170
8171 /************************************************************************
8172 * *
8173 * SAX 2 specific operations *
8174 * *
8175 ************************************************************************/
8176
8177 /*
8178 * xmlGetNamespace:
8179 * @ctxt: an XML parser context
8180 * @prefix: the prefix to lookup
8181 *
8182 * Lookup the namespace name for the @prefix (which ca be NULL)
8183 * The prefix must come from the @ctxt->dict dictionnary
8184 *
8185 * Returns the namespace name or NULL if not bound
8186 */
8187 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8188 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8189 int i;
8190
8191 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8192 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8193 if (ctxt->nsTab[i] == prefix) {
8194 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8195 return(NULL);
8196 return(ctxt->nsTab[i + 1]);
8197 }
8198 if (ctxt->nsParent) return xmlGetNamespace(ctxt->nsParent, prefix);
8199 return(NULL);
8200 }
8201
8202 /**
8203 * xmlParseQName:
8204 * @ctxt: an XML parser context
8205 * @prefix: pointer to store the prefix part
8206 *
8207 * parse an XML Namespace QName
8208 *
8209 * [6] QName ::= (Prefix ':')? LocalPart
8210 * [7] Prefix ::= NCName
8211 * [8] LocalPart ::= NCName
8212 *
8213 * Returns the Name parsed or NULL
8214 */
8215
8216 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8217 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8218 const xmlChar *l, *p;
8219
8220 GROW;
8221
8222 l = xmlParseNCName(ctxt);
8223 if (l == NULL) {
8224 if (CUR == ':') {
8225 l = xmlParseName(ctxt);
8226 if (l != NULL) {
8227 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8228 "Failed to parse QName '%s'\n", l, NULL, NULL);
8229 *prefix = NULL;
8230 return(l);
8231 }
8232 }
8233 return(NULL);
8234 }
8235 if (CUR == ':') {
8236 NEXT;
8237 p = l;
8238 l = xmlParseNCName(ctxt);
8239 if (l == NULL) {
8240 xmlChar *tmp;
8241
8242 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8243 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8244 l = xmlParseNmtoken(ctxt);
8245 if (l == NULL)
8246 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8247 else {
8248 tmp = xmlBuildQName(l, p, NULL, 0);
8249 xmlFree((char *)l);
8250 }
8251 p = xmlDictLookup(ctxt->dict, tmp, -1);
8252 if (tmp != NULL) xmlFree(tmp);
8253 *prefix = NULL;
8254 return(p);
8255 }
8256 if (CUR == ':') {
8257 xmlChar *tmp;
8258
8259 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8260 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8261 NEXT;
8262 tmp = (xmlChar *) xmlParseName(ctxt);
8263 if (tmp != NULL) {
8264 tmp = xmlBuildQName(tmp, l, NULL, 0);
8265 l = xmlDictLookup(ctxt->dict, tmp, -1);
8266 if (tmp != NULL) xmlFree(tmp);
8267 *prefix = p;
8268 return(l);
8269 }
8270 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8271 l = xmlDictLookup(ctxt->dict, tmp, -1);
8272 if (tmp != NULL) xmlFree(tmp);
8273 *prefix = p;
8274 return(l);
8275 }
8276 *prefix = p;
8277 } else
8278 *prefix = NULL;
8279 return(l);
8280 }
8281
8282 /**
8283 * xmlParseQNameAndCompare:
8284 * @ctxt: an XML parser context
8285 * @name: the localname
8286 * @prefix: the prefix, if any.
8287 *
8288 * parse an XML name and compares for match
8289 * (specialized for endtag parsing)
8290 *
8291 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8292 * and the name for mismatch
8293 */
8294
8295 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8296 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8297 xmlChar const *prefix) {
8298 const xmlChar *cmp = name;
8299 const xmlChar *in;
8300 const xmlChar *ret;
8301 const xmlChar *prefix2;
8302
8303 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8304
8305 GROW;
8306 in = ctxt->input->cur;
8307
8308 cmp = prefix;
8309 while (*in != 0 && *in == *cmp) {
8310 ++in;
8311 ++cmp;
8312 }
8313 if ((*cmp == 0) && (*in == ':')) {
8314 in++;
8315 cmp = name;
8316 while (*in != 0 && *in == *cmp) {
8317 ++in;
8318 ++cmp;
8319 }
8320 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8321 /* success */
8322 ctxt->input->cur = in;
8323 return((const xmlChar*) 1);
8324 }
8325 }
8326 /*
8327 * all strings coms from the dictionary, equality can be done directly
8328 */
8329 ret = xmlParseQName (ctxt, &prefix2);
8330 if ((ret == name) && (prefix == prefix2))
8331 return((const xmlChar*) 1);
8332 return ret;
8333 }
8334
8335 /**
8336 * xmlParseAttValueInternal:
8337 * @ctxt: an XML parser context
8338 * @len: attribute len result
8339 * @alloc: whether the attribute was reallocated as a new string
8340 * @normalize: if 1 then further non-CDATA normalization must be done
8341 *
8342 * parse a value for an attribute.
8343 * NOTE: if no normalization is needed, the routine will return pointers
8344 * directly from the data buffer.
8345 *
8346 * 3.3.3 Attribute-Value Normalization:
8347 * Before the value of an attribute is passed to the application or
8348 * checked for validity, the XML processor must normalize it as follows:
8349 * - a character reference is processed by appending the referenced
8350 * character to the attribute value
8351 * - an entity reference is processed by recursively processing the
8352 * replacement text of the entity
8353 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8354 * appending #x20 to the normalized value, except that only a single
8355 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8356 * parsed entity or the literal entity value of an internal parsed entity
8357 * - other characters are processed by appending them to the normalized value
8358 * If the declared value is not CDATA, then the XML processor must further
8359 * process the normalized attribute value by discarding any leading and
8360 * trailing space (#x20) characters, and by replacing sequences of space
8361 * (#x20) characters by a single space (#x20) character.
8362 * All attributes for which no declaration has been read should be treated
8363 * by a non-validating parser as if declared CDATA.
8364 *
8365 * Returns the AttValue parsed or NULL. The value has to be freed by the
8366 * caller if it was copied, this can be detected by val[*len] == 0.
8367 */
8368
8369 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8370 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8371 int normalize)
8372 {
8373 xmlChar limit = 0;
8374 const xmlChar *in = NULL, *start, *end, *last;
8375 xmlChar *ret = NULL;
8376
8377 GROW;
8378 in = (xmlChar *) CUR_PTR;
8379 if (*in != '"' && *in != '\'') {
8380 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8381 return (NULL);
8382 }
8383 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8384
8385 /*
8386 * try to handle in this routine the most common case where no
8387 * allocation of a new string is required and where content is
8388 * pure ASCII.
8389 */
8390 limit = *in++;
8391 end = ctxt->input->end;
8392 start = in;
8393 if (in >= end) {
8394 const xmlChar *oldbase = ctxt->input->base;
8395 GROW;
8396 if (oldbase != ctxt->input->base) {
8397 long delta = ctxt->input->base - oldbase;
8398 start = start + delta;
8399 in = in + delta;
8400 }
8401 end = ctxt->input->end;
8402 }
8403 if (normalize) {
8404 /*
8405 * Skip any leading spaces
8406 */
8407 while ((in < end) && (*in != limit) &&
8408 ((*in == 0x20) || (*in == 0x9) ||
8409 (*in == 0xA) || (*in == 0xD))) {
8410 in++;
8411 start = in;
8412 if (in >= end) {
8413 const xmlChar *oldbase = ctxt->input->base;
8414 GROW;
8415 if (oldbase != ctxt->input->base) {
8416 long delta = ctxt->input->base - oldbase;
8417 start = start + delta;
8418 in = in + delta;
8419 }
8420 end = ctxt->input->end;
8421 }
8422 }
8423 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8424 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8425 if ((*in++ == 0x20) && (*in == 0x20)) break;
8426 if (in >= end) {
8427 const xmlChar *oldbase = ctxt->input->base;
8428 GROW;
8429 if (oldbase != ctxt->input->base) {
8430 long delta = ctxt->input->base - oldbase;
8431 start = start + delta;
8432 in = in + delta;
8433 }
8434 end = ctxt->input->end;
8435 }
8436 }
8437 last = in;
8438 /*
8439 * skip the trailing blanks
8440 */
8441 while ((last[-1] == 0x20) && (last > start)) last--;
8442 while ((in < end) && (*in != limit) &&
8443 ((*in == 0x20) || (*in == 0x9) ||
8444 (*in == 0xA) || (*in == 0xD))) {
8445 in++;
8446 if (in >= end) {
8447 const xmlChar *oldbase = ctxt->input->base;
8448 GROW;
8449 if (oldbase != ctxt->input->base) {
8450 long delta = ctxt->input->base - oldbase;
8451 start = start + delta;
8452 in = in + delta;
8453 last = last + delta;
8454 }
8455 end = ctxt->input->end;
8456 }
8457 }
8458 if (*in != limit) goto need_complex;
8459 } else {
8460 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8461 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8462 in++;
8463 if (in >= end) {
8464 const xmlChar *oldbase = ctxt->input->base;
8465 GROW;
8466 if (oldbase != ctxt->input->base) {
8467 long delta = ctxt->input->base - oldbase;
8468 start = start + delta;
8469 in = in + delta;
8470 }
8471 end = ctxt->input->end;
8472 }
8473 }
8474 last = in;
8475 if (*in != limit) goto need_complex;
8476 }
8477 in++;
8478 if (len != NULL) {
8479 *len = last - start;
8480 ret = (xmlChar *) start;
8481 } else {
8482 if (alloc) *alloc = 1;
8483 ret = xmlStrndup(start, last - start);
8484 }
8485 CUR_PTR = in;
8486 if (alloc) *alloc = 0;
8487 return ret;
8488 need_complex:
8489 if (alloc) *alloc = 1;
8490 return xmlParseAttValueComplex(ctxt, len, normalize);
8491 }
8492
8493 /**
8494 * xmlParseAttribute2:
8495 * @ctxt: an XML parser context
8496 * @pref: the element prefix
8497 * @elem: the element name
8498 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8499 * @value: a xmlChar ** used to store the value of the attribute
8500 * @len: an int * to save the length of the attribute
8501 * @alloc: an int * to indicate if the attribute was allocated
8502 *
8503 * parse an attribute in the new SAX2 framework.
8504 *
8505 * Returns the attribute name, and the value in *value, .
8506 */
8507
8508 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)8509 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8510 const xmlChar * pref, const xmlChar * elem,
8511 const xmlChar ** prefix, xmlChar ** value,
8512 int *len, int *alloc)
8513 {
8514 const xmlChar *name;
8515 xmlChar *val, *internal_val = NULL;
8516 int normalize = 0;
8517
8518 *value = NULL;
8519 GROW;
8520 name = xmlParseQName(ctxt, prefix);
8521 if (name == NULL) {
8522 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8523 "error parsing attribute name\n");
8524 return (NULL);
8525 }
8526
8527 /*
8528 * get the type if needed
8529 */
8530 if (ctxt->attsSpecial != NULL) {
8531 int type;
8532
8533 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8534 pref, elem, *prefix, name);
8535 if (type != 0)
8536 normalize = 1;
8537 }
8538
8539 /*
8540 * read the value
8541 */
8542 SKIP_BLANKS;
8543 if (RAW == '=') {
8544 NEXT;
8545 SKIP_BLANKS;
8546 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8547 if (normalize) {
8548 /*
8549 * Sometimes a second normalisation pass for spaces is needed
8550 * but that only happens if charrefs or entities refernces
8551 * have been used in the attribute value, i.e. the attribute
8552 * value have been extracted in an allocated string already.
8553 */
8554 if (*alloc) {
8555 const xmlChar *val2;
8556
8557 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8558 if ((val2 != NULL) && (val2 != val)) {
8559 xmlFree(val);
8560 val = (xmlChar *) val2;
8561 }
8562 }
8563 }
8564 ctxt->instate = XML_PARSER_CONTENT;
8565 } else {
8566 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8567 "Specification mandate value for attribute %s\n",
8568 name);
8569 return (NULL);
8570 }
8571
8572 if (*prefix == ctxt->str_xml) {
8573 /*
8574 * Check that xml:lang conforms to the specification
8575 * No more registered as an error, just generate a warning now
8576 * since this was deprecated in XML second edition
8577 */
8578 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8579 internal_val = xmlStrndup(val, *len);
8580 if (!xmlCheckLanguageID(internal_val)) {
8581 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8582 "Malformed value for xml:lang : %s\n",
8583 internal_val, NULL);
8584 }
8585 }
8586
8587 /*
8588 * Check that xml:space conforms to the specification
8589 */
8590 if (xmlStrEqual(name, BAD_CAST "space")) {
8591 internal_val = xmlStrndup(val, *len);
8592 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8593 *(ctxt->space) = 0;
8594 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8595 *(ctxt->space) = 1;
8596 else {
8597 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8598 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8599 internal_val, NULL);
8600 }
8601 }
8602 if (internal_val) {
8603 xmlFree(internal_val);
8604 }
8605 }
8606
8607 *value = val;
8608 return (name);
8609 }
8610 /**
8611 * xmlParseStartTag2:
8612 * @ctxt: an XML parser context
8613 *
8614 * parse a start of tag either for rule element or
8615 * EmptyElement. In both case we don't parse the tag closing chars.
8616 * This routine is called when running SAX2 parsing
8617 *
8618 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8619 *
8620 * [ WFC: Unique Att Spec ]
8621 * No attribute name may appear more than once in the same start-tag or
8622 * empty-element tag.
8623 *
8624 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8625 *
8626 * [ WFC: Unique Att Spec ]
8627 * No attribute name may appear more than once in the same start-tag or
8628 * empty-element tag.
8629 *
8630 * With namespace:
8631 *
8632 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8633 *
8634 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8635 *
8636 * Returns the element name parsed
8637 */
8638
8639 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)8640 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8641 const xmlChar **URI, int *tlen) {
8642 const xmlChar *localname;
8643 const xmlChar *prefix;
8644 const xmlChar *attname;
8645 const xmlChar *aprefix;
8646 const xmlChar *nsname;
8647 xmlChar *attvalue;
8648 const xmlChar **atts = ctxt->atts;
8649 int maxatts = ctxt->maxatts;
8650 int nratts, nbatts, nbdef;
8651 int i, j, nbNs, attval, oldline, oldcol;
8652 const xmlChar *base;
8653 unsigned long cur;
8654 int nsNr = ctxt->nsNr;
8655
8656 if (RAW != '<') return(NULL);
8657 NEXT1;
8658
8659 /*
8660 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8661 * point since the attribute values may be stored as pointers to
8662 * the buffer and calling SHRINK would destroy them !
8663 * The Shrinking is only possible once the full set of attribute
8664 * callbacks have been done.
8665 */
8666 reparse:
8667 SHRINK;
8668 base = ctxt->input->base;
8669 cur = ctxt->input->cur - ctxt->input->base;
8670 oldline = ctxt->input->line;
8671 oldcol = ctxt->input->col;
8672 nbatts = 0;
8673 nratts = 0;
8674 nbdef = 0;
8675 nbNs = 0;
8676 attval = 0;
8677 /* Forget any namespaces added during an earlier parse of this element. */
8678 ctxt->nsNr = nsNr;
8679
8680 localname = xmlParseQName(ctxt, &prefix);
8681 if (localname == NULL) {
8682 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8683 "StartTag: invalid element name\n");
8684 return(NULL);
8685 }
8686 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8687
8688 /*
8689 * Now parse the attributes, it ends up with the ending
8690 *
8691 * (S Attribute)* S?
8692 */
8693 SKIP_BLANKS;
8694 GROW;
8695 if (ctxt->input->base != base) goto base_changed;
8696
8697 while ((RAW != '>') &&
8698 ((RAW != '/') || (NXT(1) != '>')) &&
8699 (IS_BYTE_CHAR(RAW))) {
8700 const xmlChar *q = CUR_PTR;
8701 unsigned int cons = ctxt->input->consumed;
8702 int len = -1, alloc = 0;
8703
8704 attname = xmlParseAttribute2(ctxt, prefix, localname,
8705 &aprefix, &attvalue, &len, &alloc);
8706 if (ctxt->input->base != base) {
8707 if ((attvalue != NULL) && (alloc != 0))
8708 xmlFree(attvalue);
8709 attvalue = NULL;
8710 goto base_changed;
8711 }
8712 if ((attname != NULL) && (attvalue != NULL)) {
8713 if (len < 0) len = xmlStrlen(attvalue);
8714 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8715 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8716 xmlURIPtr uri;
8717
8718 if (*URL != 0) {
8719 uri = xmlParseURI((const char *) URL);
8720 if (uri == NULL) {
8721 xmlNsErr(ctxt, XML_WAR_NS_URI,
8722 "xmlns: '%s' is not a valid URI\n",
8723 URL, NULL, NULL);
8724 } else {
8725 if (uri->scheme == NULL) {
8726 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8727 "xmlns: URI %s is not absolute\n",
8728 URL, NULL, NULL);
8729 }
8730 xmlFreeURI(uri);
8731 }
8732 if (URL == ctxt->str_xml_ns) {
8733 if (attname != ctxt->str_xml) {
8734 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8735 "xml namespace URI cannot be the default namespace\n",
8736 NULL, NULL, NULL);
8737 }
8738 goto skip_default_ns;
8739 }
8740 if ((len == 29) &&
8741 (xmlStrEqual(URL,
8742 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8743 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8744 "reuse of the xmlns namespace name is forbidden\n",
8745 NULL, NULL, NULL);
8746 goto skip_default_ns;
8747 }
8748 }
8749 /*
8750 * check that it's not a defined namespace
8751 */
8752 for (j = 1;j <= nbNs;j++)
8753 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8754 break;
8755 if (j <= nbNs)
8756 xmlErrAttributeDup(ctxt, NULL, attname);
8757 else
8758 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8759 skip_default_ns:
8760 if (alloc != 0) xmlFree(attvalue);
8761 SKIP_BLANKS;
8762 continue;
8763 }
8764 if (aprefix == ctxt->str_xmlns) {
8765 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8766 xmlURIPtr uri;
8767
8768 if (attname == ctxt->str_xml) {
8769 if (URL != ctxt->str_xml_ns) {
8770 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8771 "xml namespace prefix mapped to wrong URI\n",
8772 NULL, NULL, NULL);
8773 }
8774 /*
8775 * Do not keep a namespace definition node
8776 */
8777 goto skip_ns;
8778 }
8779 if (URL == ctxt->str_xml_ns) {
8780 if (attname != ctxt->str_xml) {
8781 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8782 "xml namespace URI mapped to wrong prefix\n",
8783 NULL, NULL, NULL);
8784 }
8785 goto skip_ns;
8786 }
8787 if (attname == ctxt->str_xmlns) {
8788 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8789 "redefinition of the xmlns prefix is forbidden\n",
8790 NULL, NULL, NULL);
8791 goto skip_ns;
8792 }
8793 if ((len == 29) &&
8794 (xmlStrEqual(URL,
8795 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8796 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8797 "reuse of the xmlns namespace name is forbidden\n",
8798 NULL, NULL, NULL);
8799 goto skip_ns;
8800 }
8801 if ((URL == NULL) || (URL[0] == 0)) {
8802 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8803 "xmlns:%s: Empty XML namespace is not allowed\n",
8804 attname, NULL, NULL);
8805 goto skip_ns;
8806 } else {
8807 uri = xmlParseURI((const char *) URL);
8808 if (uri == NULL) {
8809 xmlNsErr(ctxt, XML_WAR_NS_URI,
8810 "xmlns:%s: '%s' is not a valid URI\n",
8811 attname, URL, NULL);
8812 } else {
8813 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8814 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8815 "xmlns:%s: URI %s is not absolute\n",
8816 attname, URL, NULL);
8817 }
8818 xmlFreeURI(uri);
8819 }
8820 }
8821
8822 /*
8823 * check that it's not a defined namespace
8824 */
8825 for (j = 1;j <= nbNs;j++)
8826 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8827 break;
8828 if (j <= nbNs)
8829 xmlErrAttributeDup(ctxt, aprefix, attname);
8830 else
8831 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8832 skip_ns:
8833 if (alloc != 0) xmlFree(attvalue);
8834 SKIP_BLANKS;
8835 if (ctxt->input->base != base) goto base_changed;
8836 continue;
8837 }
8838
8839 /*
8840 * Add the pair to atts
8841 */
8842 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8843 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8844 if (attvalue[len] == 0)
8845 xmlFree(attvalue);
8846 goto failed;
8847 }
8848 maxatts = ctxt->maxatts;
8849 atts = ctxt->atts;
8850 }
8851 ctxt->attallocs[nratts++] = alloc;
8852 atts[nbatts++] = attname;
8853 atts[nbatts++] = aprefix;
8854 atts[nbatts++] = NULL; /* the URI will be fetched later */
8855 atts[nbatts++] = attvalue;
8856 attvalue += len;
8857 atts[nbatts++] = attvalue;
8858 /*
8859 * tag if some deallocation is needed
8860 */
8861 if (alloc != 0) attval = 1;
8862 } else {
8863 if ((attvalue != NULL) && (attvalue[len] == 0))
8864 xmlFree(attvalue);
8865 }
8866
8867 failed:
8868
8869 GROW
8870 if (ctxt->input->base != base) goto base_changed;
8871 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8872 break;
8873 if (!IS_BLANK_CH(RAW)) {
8874 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8875 "attributes construct error\n");
8876 break;
8877 }
8878 SKIP_BLANKS;
8879 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8880 (attname == NULL) && (attvalue == NULL)) {
8881 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8882 "xmlParseStartTag: problem parsing attributes\n");
8883 break;
8884 }
8885 GROW;
8886 if (ctxt->input->base != base) goto base_changed;
8887 }
8888
8889 /*
8890 * The attributes defaulting
8891 */
8892 if (ctxt->attsDefault != NULL) {
8893 xmlDefAttrsPtr defaults;
8894
8895 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8896 if (defaults != NULL) {
8897 for (i = 0;i < defaults->nbAttrs;i++) {
8898 attname = defaults->values[5 * i];
8899 aprefix = defaults->values[5 * i + 1];
8900
8901 /*
8902 * special work for namespaces defaulted defs
8903 */
8904 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8905 /*
8906 * check that it's not a defined namespace
8907 */
8908 for (j = 1;j <= nbNs;j++)
8909 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8910 break;
8911 if (j <= nbNs) continue;
8912
8913 nsname = xmlGetNamespace(ctxt, NULL);
8914 if (nsname != defaults->values[5 * i + 2]) {
8915 if (nsPush(ctxt, NULL,
8916 defaults->values[5 * i + 2]) > 0)
8917 nbNs++;
8918 }
8919 } else if (aprefix == ctxt->str_xmlns) {
8920 /*
8921 * check that it's not a defined namespace
8922 */
8923 for (j = 1;j <= nbNs;j++)
8924 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8925 break;
8926 if (j <= nbNs) continue;
8927
8928 nsname = xmlGetNamespace(ctxt, attname);
8929 if (nsname != defaults->values[2]) {
8930 if (nsPush(ctxt, attname,
8931 defaults->values[5 * i + 2]) > 0)
8932 nbNs++;
8933 }
8934 } else {
8935 /*
8936 * check that it's not a defined attribute
8937 */
8938 for (j = 0;j < nbatts;j+=5) {
8939 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8940 break;
8941 }
8942 if (j < nbatts) continue;
8943
8944 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8945 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8946 return(NULL);
8947 }
8948 maxatts = ctxt->maxatts;
8949 atts = ctxt->atts;
8950 }
8951 atts[nbatts++] = attname;
8952 atts[nbatts++] = aprefix;
8953 if (aprefix == NULL)
8954 atts[nbatts++] = NULL;
8955 else
8956 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8957 atts[nbatts++] = defaults->values[5 * i + 2];
8958 atts[nbatts++] = defaults->values[5 * i + 3];
8959 if ((ctxt->standalone == 1) &&
8960 (defaults->values[5 * i + 4] != NULL)) {
8961 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8962 "standalone: attribute %s on %s defaulted from external subset\n",
8963 attname, localname);
8964 }
8965 nbdef++;
8966 }
8967 }
8968 }
8969 }
8970
8971 /*
8972 * The attributes checkings
8973 */
8974 for (i = 0; i < nbatts;i += 5) {
8975 /*
8976 * The default namespace does not apply to attribute names.
8977 */
8978 if (atts[i + 1] != NULL) {
8979 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8980 if (nsname == NULL) {
8981 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8982 "Namespace prefix %s for %s on %s is not defined\n",
8983 atts[i + 1], atts[i], localname);
8984 }
8985 atts[i + 2] = nsname;
8986 } else
8987 nsname = NULL;
8988 /*
8989 * [ WFC: Unique Att Spec ]
8990 * No attribute name may appear more than once in the same
8991 * start-tag or empty-element tag.
8992 * As extended by the Namespace in XML REC.
8993 */
8994 for (j = 0; j < i;j += 5) {
8995 if (atts[i] == atts[j]) {
8996 if (atts[i+1] == atts[j+1]) {
8997 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8998 break;
8999 }
9000 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9001 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9002 "Namespaced Attribute %s in '%s' redefined\n",
9003 atts[i], nsname, NULL);
9004 break;
9005 }
9006 }
9007 }
9008 }
9009
9010 nsname = xmlGetNamespace(ctxt, prefix);
9011 if ((prefix != NULL) && (nsname == NULL)) {
9012 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9013 "Namespace prefix %s on %s is not defined\n",
9014 prefix, localname, NULL);
9015 }
9016 *pref = prefix;
9017 *URI = nsname;
9018
9019 /*
9020 * SAX: Start of Element !
9021 */
9022 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9023 (!ctxt->disableSAX)) {
9024 if (nbNs > 0)
9025 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9026 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9027 nbatts / 5, nbdef, atts);
9028 else
9029 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9030 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9031 }
9032
9033 /*
9034 * Free up attribute allocated strings if needed
9035 */
9036 if (attval != 0) {
9037 for (i = 3,j = 0; j < nratts;i += 5,j++)
9038 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9039 xmlFree((xmlChar *) atts[i]);
9040 }
9041
9042 return(localname);
9043
9044 base_changed:
9045 /*
9046 * the attribute strings are valid iif the base didn't changed
9047 */
9048 if (attval != 0) {
9049 for (i = 3,j = 0; j < nratts;i += 5,j++)
9050 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9051 xmlFree((xmlChar *) atts[i]);
9052 }
9053 ctxt->input->cur = ctxt->input->base + cur;
9054 ctxt->input->line = oldline;
9055 ctxt->input->col = oldcol;
9056 if (ctxt->wellFormed == 1) {
9057 goto reparse;
9058 }
9059 return(NULL);
9060 }
9061
9062 /**
9063 * xmlParseEndTag2:
9064 * @ctxt: an XML parser context
9065 * @line: line of the start tag
9066 * @nsNr: number of namespaces on the start tag
9067 *
9068 * parse an end of tag
9069 *
9070 * [42] ETag ::= '</' Name S? '>'
9071 *
9072 * With namespace
9073 *
9074 * [NS 9] ETag ::= '</' QName S? '>'
9075 */
9076
9077 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9078 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9079 const xmlChar *URI, int line, int nsNr, int tlen) {
9080 const xmlChar *name;
9081
9082 GROW;
9083 if ((RAW != '<') || (NXT(1) != '/')) {
9084 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9085 return;
9086 }
9087 SKIP(2);
9088
9089 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9090 if (ctxt->input->cur[tlen] == '>') {
9091 ctxt->input->cur += tlen + 1;
9092 goto done;
9093 }
9094 ctxt->input->cur += tlen;
9095 name = (xmlChar*)1;
9096 } else {
9097 if (prefix == NULL)
9098 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9099 else
9100 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9101 }
9102
9103 /*
9104 * We should definitely be at the ending "S? '>'" part
9105 */
9106 GROW;
9107 SKIP_BLANKS;
9108 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9109 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9110 } else
9111 NEXT1;
9112
9113 /*
9114 * [ WFC: Element Type Match ]
9115 * The Name in an element's end-tag must match the element type in the
9116 * start-tag.
9117 *
9118 */
9119 if (name != (xmlChar*)1) {
9120 if (name == NULL) name = BAD_CAST "unparseable";
9121 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9122 "Opening and ending tag mismatch: %s line %d and %s\n",
9123 ctxt->name, line, name);
9124 }
9125
9126 /*
9127 * SAX: End of Tag
9128 */
9129 done:
9130 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9131 (!ctxt->disableSAX))
9132 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9133
9134 spacePop(ctxt);
9135 if (nsNr != 0)
9136 nsPop(ctxt, nsNr);
9137 return;
9138 }
9139
9140 /**
9141 * xmlParseCDSect:
9142 * @ctxt: an XML parser context
9143 *
9144 * Parse escaped pure raw content.
9145 *
9146 * [18] CDSect ::= CDStart CData CDEnd
9147 *
9148 * [19] CDStart ::= '<![CDATA['
9149 *
9150 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9151 *
9152 * [21] CDEnd ::= ']]>'
9153 */
9154 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9155 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9156 xmlChar *buf = NULL;
9157 int len = 0;
9158 int size = XML_PARSER_BUFFER_SIZE;
9159 int r, rl;
9160 int s, sl;
9161 int cur, l;
9162 int count = 0;
9163
9164 /* Check 2.6.0 was NXT(0) not RAW */
9165 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9166 SKIP(9);
9167 } else
9168 return;
9169
9170 ctxt->instate = XML_PARSER_CDATA_SECTION;
9171 r = CUR_CHAR(rl);
9172 if (!IS_CHAR(r)) {
9173 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9174 ctxt->instate = XML_PARSER_CONTENT;
9175 return;
9176 }
9177 NEXTL(rl);
9178 s = CUR_CHAR(sl);
9179 if (!IS_CHAR(s)) {
9180 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9181 ctxt->instate = XML_PARSER_CONTENT;
9182 return;
9183 }
9184 NEXTL(sl);
9185 cur = CUR_CHAR(l);
9186 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9187 if (buf == NULL) {
9188 xmlErrMemory(ctxt, NULL);
9189 return;
9190 }
9191 while (IS_CHAR(cur) &&
9192 ((r != ']') || (s != ']') || (cur != '>'))) {
9193 if (len + 5 >= size) {
9194 xmlChar *tmp;
9195
9196 size *= 2;
9197 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9198 if (tmp == NULL) {
9199 xmlFree(buf);
9200 xmlErrMemory(ctxt, NULL);
9201 return;
9202 }
9203 buf = tmp;
9204 }
9205 COPY_BUF(rl,buf,len,r);
9206 r = s;
9207 rl = sl;
9208 s = cur;
9209 sl = l;
9210 count++;
9211 if (count > 50) {
9212 GROW;
9213 count = 0;
9214 }
9215 NEXTL(l);
9216 cur = CUR_CHAR(l);
9217 }
9218 buf[len] = 0;
9219 ctxt->instate = XML_PARSER_CONTENT;
9220 if (cur != '>') {
9221 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9222 "CData section not finished\n%.50s\n", buf);
9223 xmlFree(buf);
9224 return;
9225 }
9226 NEXTL(l);
9227
9228 /*
9229 * OK the buffer is to be consumed as cdata.
9230 */
9231 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9232 if (ctxt->sax->cdataBlock != NULL)
9233 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9234 else if (ctxt->sax->characters != NULL)
9235 ctxt->sax->characters(ctxt->userData, buf, len);
9236 }
9237 xmlFree(buf);
9238 }
9239
9240 /**
9241 * xmlParseContent:
9242 * @ctxt: an XML parser context
9243 *
9244 * Parse a content:
9245 *
9246 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9247 */
9248
9249 void
xmlParseContent(xmlParserCtxtPtr ctxt)9250 xmlParseContent(xmlParserCtxtPtr ctxt) {
9251 GROW;
9252 while ((RAW != 0) &&
9253 ((RAW != '<') || (NXT(1) != '/')) &&
9254 (ctxt->instate != XML_PARSER_EOF)) {
9255 const xmlChar *test = CUR_PTR;
9256 unsigned int cons = ctxt->input->consumed;
9257 const xmlChar *cur = ctxt->input->cur;
9258
9259 /*
9260 * First case : a Processing Instruction.
9261 */
9262 if ((*cur == '<') && (cur[1] == '?')) {
9263 xmlParsePI(ctxt);
9264 }
9265
9266 /*
9267 * Second case : a CDSection
9268 */
9269 /* 2.6.0 test was *cur not RAW */
9270 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9271 xmlParseCDSect(ctxt);
9272 }
9273
9274 /*
9275 * Third case : a comment
9276 */
9277 else if ((*cur == '<') && (NXT(1) == '!') &&
9278 (NXT(2) == '-') && (NXT(3) == '-')) {
9279 xmlParseComment(ctxt);
9280 ctxt->instate = XML_PARSER_CONTENT;
9281 }
9282
9283 /*
9284 * Fourth case : a sub-element.
9285 */
9286 else if (*cur == '<') {
9287 xmlParseElement(ctxt);
9288 }
9289
9290 /*
9291 * Fifth case : a reference. If if has not been resolved,
9292 * parsing returns it's Name, create the node
9293 */
9294
9295 else if (*cur == '&') {
9296 xmlParseReference(ctxt);
9297 }
9298
9299 /*
9300 * Last case, text. Note that References are handled directly.
9301 */
9302 else {
9303 xmlParseCharData(ctxt, 0);
9304 }
9305
9306 GROW;
9307 /*
9308 * Pop-up of finished entities.
9309 */
9310 while ((RAW == 0) && (ctxt->inputNr > 1))
9311 xmlPopInput(ctxt);
9312 SHRINK;
9313
9314 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9315 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9316 "detected an error in element content\n");
9317 ctxt->instate = XML_PARSER_EOF;
9318 break;
9319 }
9320 }
9321 }
9322
9323 /**
9324 * xmlParseElement:
9325 * @ctxt: an XML parser context
9326 *
9327 * parse an XML element, this is highly recursive
9328 *
9329 * [39] element ::= EmptyElemTag | STag content ETag
9330 *
9331 * [ WFC: Element Type Match ]
9332 * The Name in an element's end-tag must match the element type in the
9333 * start-tag.
9334 *
9335 */
9336
9337 void
xmlParseElement(xmlParserCtxtPtr ctxt)9338 xmlParseElement(xmlParserCtxtPtr ctxt) {
9339 const xmlChar *name;
9340 const xmlChar *prefix;
9341 const xmlChar *URI;
9342 xmlParserNodeInfo node_info;
9343 int line, tlen;
9344 xmlNodePtr ret;
9345 int nsNr = ctxt->nsNr;
9346
9347 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9348 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9349 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9350 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9351 xmlParserMaxDepth);
9352 ctxt->instate = XML_PARSER_EOF;
9353 return;
9354 }
9355
9356 /* Capture start position */
9357 if (ctxt->record_info) {
9358 node_info.begin_pos = ctxt->input->consumed +
9359 (CUR_PTR - ctxt->input->base);
9360 node_info.begin_line = ctxt->input->line;
9361 }
9362
9363 if (ctxt->spaceNr == 0)
9364 spacePush(ctxt, -1);
9365 else if (*ctxt->space == -2)
9366 spacePush(ctxt, -1);
9367 else
9368 spacePush(ctxt, *ctxt->space);
9369
9370 line = ctxt->input->line;
9371 #ifdef LIBXML_SAX1_ENABLED
9372 if (ctxt->sax2)
9373 #endif /* LIBXML_SAX1_ENABLED */
9374 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9375 #ifdef LIBXML_SAX1_ENABLED
9376 else
9377 name = xmlParseStartTag(ctxt);
9378 #endif /* LIBXML_SAX1_ENABLED */
9379 if (name == NULL) {
9380 spacePop(ctxt);
9381 return;
9382 }
9383 namePush(ctxt, name);
9384 ret = ctxt->node;
9385
9386 #ifdef LIBXML_VALID_ENABLED
9387 /*
9388 * [ VC: Root Element Type ]
9389 * The Name in the document type declaration must match the element
9390 * type of the root element.
9391 */
9392 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9393 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9394 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9395 #endif /* LIBXML_VALID_ENABLED */
9396
9397 /*
9398 * Check for an Empty Element.
9399 */
9400 if ((RAW == '/') && (NXT(1) == '>')) {
9401 SKIP(2);
9402 if (ctxt->sax2) {
9403 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9404 (!ctxt->disableSAX))
9405 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9406 #ifdef LIBXML_SAX1_ENABLED
9407 } else {
9408 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9409 (!ctxt->disableSAX))
9410 ctxt->sax->endElement(ctxt->userData, name);
9411 #endif /* LIBXML_SAX1_ENABLED */
9412 }
9413 namePop(ctxt);
9414 spacePop(ctxt);
9415 if (nsNr != ctxt->nsNr)
9416 nsPop(ctxt, ctxt->nsNr - nsNr);
9417 if ( ret != NULL && ctxt->record_info ) {
9418 node_info.end_pos = ctxt->input->consumed +
9419 (CUR_PTR - ctxt->input->base);
9420 node_info.end_line = ctxt->input->line;
9421 node_info.node = ret;
9422 xmlParserAddNodeInfo(ctxt, &node_info);
9423 }
9424 return;
9425 }
9426 if (RAW == '>') {
9427 NEXT1;
9428 } else {
9429 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9430 "Couldn't find end of Start Tag %s line %d\n",
9431 name, line, NULL);
9432
9433 /*
9434 * end of parsing of this node.
9435 */
9436 nodePop(ctxt);
9437 namePop(ctxt);
9438 spacePop(ctxt);
9439 if (nsNr != ctxt->nsNr)
9440 nsPop(ctxt, ctxt->nsNr - nsNr);
9441
9442 /*
9443 * Capture end position and add node
9444 */
9445 if ( ret != NULL && ctxt->record_info ) {
9446 node_info.end_pos = ctxt->input->consumed +
9447 (CUR_PTR - ctxt->input->base);
9448 node_info.end_line = ctxt->input->line;
9449 node_info.node = ret;
9450 xmlParserAddNodeInfo(ctxt, &node_info);
9451 }
9452 return;
9453 }
9454
9455 /*
9456 * Parse the content of the element:
9457 */
9458 xmlParseContent(ctxt);
9459 if (!IS_BYTE_CHAR(RAW)) {
9460 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9461 "Premature end of data in tag %s line %d\n",
9462 name, line, NULL);
9463
9464 /*
9465 * end of parsing of this node.
9466 */
9467 nodePop(ctxt);
9468 namePop(ctxt);
9469 spacePop(ctxt);
9470 if (nsNr != ctxt->nsNr)
9471 nsPop(ctxt, ctxt->nsNr - nsNr);
9472 return;
9473 }
9474
9475 /*
9476 * parse the end of tag: '</' should be here.
9477 */
9478 if (ctxt->sax2) {
9479 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9480 namePop(ctxt);
9481 }
9482 #ifdef LIBXML_SAX1_ENABLED
9483 else
9484 xmlParseEndTag1(ctxt, line);
9485 #endif /* LIBXML_SAX1_ENABLED */
9486
9487 /*
9488 * Capture end position and add node
9489 */
9490 if ( ret != NULL && ctxt->record_info ) {
9491 node_info.end_pos = ctxt->input->consumed +
9492 (CUR_PTR - ctxt->input->base);
9493 node_info.end_line = ctxt->input->line;
9494 node_info.node = ret;
9495 xmlParserAddNodeInfo(ctxt, &node_info);
9496 }
9497 }
9498
9499 /**
9500 * xmlParseVersionNum:
9501 * @ctxt: an XML parser context
9502 *
9503 * parse the XML version value.
9504 *
9505 * [26] VersionNum ::= '1.' [0-9]+
9506 *
9507 * In practice allow [0-9].[0-9]+ at that level
9508 *
9509 * Returns the string giving the XML version number, or NULL
9510 */
9511 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9512 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9513 xmlChar *buf = NULL;
9514 int len = 0;
9515 int size = 10;
9516 xmlChar cur;
9517
9518 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9519 if (buf == NULL) {
9520 xmlErrMemory(ctxt, NULL);
9521 return(NULL);
9522 }
9523 cur = CUR;
9524 if (!((cur >= '0') && (cur <= '9'))) {
9525 xmlFree(buf);
9526 return(NULL);
9527 }
9528 buf[len++] = cur;
9529 NEXT;
9530 cur=CUR;
9531 if (cur != '.') {
9532 xmlFree(buf);
9533 return(NULL);
9534 }
9535 buf[len++] = cur;
9536 NEXT;
9537 cur=CUR;
9538 while ((cur >= '0') && (cur <= '9')) {
9539 if (len + 1 >= size) {
9540 xmlChar *tmp;
9541
9542 size *= 2;
9543 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9544 if (tmp == NULL) {
9545 xmlFree(buf);
9546 xmlErrMemory(ctxt, NULL);
9547 return(NULL);
9548 }
9549 buf = tmp;
9550 }
9551 buf[len++] = cur;
9552 NEXT;
9553 cur=CUR;
9554 }
9555 buf[len] = 0;
9556 return(buf);
9557 }
9558
9559 /**
9560 * xmlParseVersionInfo:
9561 * @ctxt: an XML parser context
9562 *
9563 * parse the XML version.
9564 *
9565 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9566 *
9567 * [25] Eq ::= S? '=' S?
9568 *
9569 * Returns the version string, e.g. "1.0"
9570 */
9571
9572 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)9573 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9574 xmlChar *version = NULL;
9575
9576 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9577 SKIP(7);
9578 SKIP_BLANKS;
9579 if (RAW != '=') {
9580 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9581 return(NULL);
9582 }
9583 NEXT;
9584 SKIP_BLANKS;
9585 if (RAW == '"') {
9586 NEXT;
9587 version = xmlParseVersionNum(ctxt);
9588 if (RAW != '"') {
9589 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9590 } else
9591 NEXT;
9592 } else if (RAW == '\''){
9593 NEXT;
9594 version = xmlParseVersionNum(ctxt);
9595 if (RAW != '\'') {
9596 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9597 } else
9598 NEXT;
9599 } else {
9600 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9601 }
9602 }
9603 return(version);
9604 }
9605
9606 /**
9607 * xmlParseEncName:
9608 * @ctxt: an XML parser context
9609 *
9610 * parse the XML encoding name
9611 *
9612 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9613 *
9614 * Returns the encoding name value or NULL
9615 */
9616 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)9617 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9618 xmlChar *buf = NULL;
9619 int len = 0;
9620 int size = 10;
9621 xmlChar cur;
9622
9623 cur = CUR;
9624 if (((cur >= 'a') && (cur <= 'z')) ||
9625 ((cur >= 'A') && (cur <= 'Z'))) {
9626 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9627 if (buf == NULL) {
9628 xmlErrMemory(ctxt, NULL);
9629 return(NULL);
9630 }
9631
9632 buf[len++] = cur;
9633 NEXT;
9634 cur = CUR;
9635 while (((cur >= 'a') && (cur <= 'z')) ||
9636 ((cur >= 'A') && (cur <= 'Z')) ||
9637 ((cur >= '0') && (cur <= '9')) ||
9638 (cur == '.') || (cur == '_') ||
9639 (cur == '-')) {
9640 if (len + 1 >= size) {
9641 xmlChar *tmp;
9642
9643 size *= 2;
9644 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9645 if (tmp == NULL) {
9646 xmlErrMemory(ctxt, NULL);
9647 xmlFree(buf);
9648 return(NULL);
9649 }
9650 buf = tmp;
9651 }
9652 buf[len++] = cur;
9653 NEXT;
9654 cur = CUR;
9655 if (cur == 0) {
9656 SHRINK;
9657 GROW;
9658 cur = CUR;
9659 }
9660 }
9661 buf[len] = 0;
9662 } else {
9663 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9664 }
9665 return(buf);
9666 }
9667
9668 /**
9669 * xmlParseEncodingDecl:
9670 * @ctxt: an XML parser context
9671 *
9672 * parse the XML encoding declaration
9673 *
9674 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9675 *
9676 * this setups the conversion filters.
9677 *
9678 * Returns the encoding value or NULL
9679 */
9680
9681 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)9682 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9683 xmlChar *encoding = NULL;
9684
9685 SKIP_BLANKS;
9686 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9687 SKIP(8);
9688 SKIP_BLANKS;
9689 if (RAW != '=') {
9690 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9691 return(NULL);
9692 }
9693 NEXT;
9694 SKIP_BLANKS;
9695 if (RAW == '"') {
9696 NEXT;
9697 encoding = xmlParseEncName(ctxt);
9698 if (RAW != '"') {
9699 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9700 } else
9701 NEXT;
9702 } else if (RAW == '\''){
9703 NEXT;
9704 encoding = xmlParseEncName(ctxt);
9705 if (RAW != '\'') {
9706 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9707 } else
9708 NEXT;
9709 } else {
9710 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9711 }
9712 /*
9713 * UTF-16 encoding stwich has already taken place at this stage,
9714 * more over the little-endian/big-endian selection is already done
9715 */
9716 if ((encoding != NULL) &&
9717 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9718 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9719 /*
9720 * If no encoding was passed to the parser, that we are
9721 * using UTF-16 and no decoder is present i.e. the
9722 * document is apparently UTF-8 compatible, then raise an
9723 * encoding mismatch fatal error
9724 */
9725 if ((ctxt->encoding == NULL) &&
9726 (ctxt->input->buf != NULL) &&
9727 (ctxt->input->buf->encoder == NULL)) {
9728 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9729 "Document labelled UTF-16 but has UTF-8 content\n");
9730 }
9731 if (ctxt->encoding != NULL)
9732 xmlFree((xmlChar *) ctxt->encoding);
9733 ctxt->encoding = encoding;
9734 }
9735 /*
9736 * UTF-8 encoding is handled natively
9737 */
9738 else if ((encoding != NULL) &&
9739 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9740 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9741 if (ctxt->encoding != NULL)
9742 xmlFree((xmlChar *) ctxt->encoding);
9743 ctxt->encoding = encoding;
9744 }
9745 else if (encoding != NULL) {
9746 xmlCharEncodingHandlerPtr handler;
9747
9748 if (ctxt->input->encoding != NULL)
9749 xmlFree((xmlChar *) ctxt->input->encoding);
9750 ctxt->input->encoding = encoding;
9751
9752 handler = xmlFindCharEncodingHandler((const char *) encoding);
9753 if (handler != NULL) {
9754 xmlSwitchToEncoding(ctxt, handler);
9755 } else {
9756 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9757 "Unsupported encoding %s\n", encoding);
9758 return(NULL);
9759 }
9760 }
9761 }
9762 return(encoding);
9763 }
9764
9765 /**
9766 * xmlParseSDDecl:
9767 * @ctxt: an XML parser context
9768 *
9769 * parse the XML standalone declaration
9770 *
9771 * [32] SDDecl ::= S 'standalone' Eq
9772 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9773 *
9774 * [ VC: Standalone Document Declaration ]
9775 * TODO The standalone document declaration must have the value "no"
9776 * if any external markup declarations contain declarations of:
9777 * - attributes with default values, if elements to which these
9778 * attributes apply appear in the document without specifications
9779 * of values for these attributes, or
9780 * - entities (other than amp, lt, gt, apos, quot), if references
9781 * to those entities appear in the document, or
9782 * - attributes with values subject to normalization, where the
9783 * attribute appears in the document with a value which will change
9784 * as a result of normalization, or
9785 * - element types with element content, if white space occurs directly
9786 * within any instance of those types.
9787 *
9788 * Returns:
9789 * 1 if standalone="yes"
9790 * 0 if standalone="no"
9791 * -2 if standalone attribute is missing or invalid
9792 * (A standalone value of -2 means that the XML declaration was found,
9793 * but no value was specified for the standalone attribute).
9794 */
9795
9796 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)9797 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9798 int standalone = -2;
9799
9800 SKIP_BLANKS;
9801 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9802 SKIP(10);
9803 SKIP_BLANKS;
9804 if (RAW != '=') {
9805 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9806 return(standalone);
9807 }
9808 NEXT;
9809 SKIP_BLANKS;
9810 if (RAW == '\''){
9811 NEXT;
9812 if ((RAW == 'n') && (NXT(1) == 'o')) {
9813 standalone = 0;
9814 SKIP(2);
9815 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9816 (NXT(2) == 's')) {
9817 standalone = 1;
9818 SKIP(3);
9819 } else {
9820 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9821 }
9822 if (RAW != '\'') {
9823 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9824 } else
9825 NEXT;
9826 } else if (RAW == '"'){
9827 NEXT;
9828 if ((RAW == 'n') && (NXT(1) == 'o')) {
9829 standalone = 0;
9830 SKIP(2);
9831 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9832 (NXT(2) == 's')) {
9833 standalone = 1;
9834 SKIP(3);
9835 } else {
9836 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9837 }
9838 if (RAW != '"') {
9839 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9840 } else
9841 NEXT;
9842 } else {
9843 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9844 }
9845 }
9846 return(standalone);
9847 }
9848
9849 /**
9850 * xmlParseXMLDecl:
9851 * @ctxt: an XML parser context
9852 *
9853 * parse an XML declaration header
9854 *
9855 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9856 */
9857
9858 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)9859 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9860 xmlChar *version;
9861
9862 /*
9863 * This value for standalone indicates that the document has an
9864 * XML declaration but it does not have a standalone attribute.
9865 * It will be overwritten later if a standalone attribute is found.
9866 */
9867 ctxt->input->standalone = -2;
9868
9869 /*
9870 * We know that '<?xml' is here.
9871 */
9872 SKIP(5);
9873
9874 if (!IS_BLANK_CH(RAW)) {
9875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9876 "Blank needed after '<?xml'\n");
9877 }
9878 SKIP_BLANKS;
9879
9880 /*
9881 * We must have the VersionInfo here.
9882 */
9883 version = xmlParseVersionInfo(ctxt);
9884 if (version == NULL) {
9885 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9886 } else {
9887 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9888 /*
9889 * Changed here for XML-1.0 5th edition
9890 */
9891 if (ctxt->options & XML_PARSE_OLD10) {
9892 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9893 "Unsupported version '%s'\n",
9894 version);
9895 } else {
9896 if ((version[0] == '1') && ((version[1] == '.'))) {
9897 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9898 "Unsupported version '%s'\n",
9899 version, NULL);
9900 } else {
9901 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9902 "Unsupported version '%s'\n",
9903 version);
9904 }
9905 }
9906 }
9907 if (ctxt->version != NULL)
9908 xmlFree((void *) ctxt->version);
9909 ctxt->version = version;
9910 }
9911
9912 /*
9913 * We may have the encoding declaration
9914 */
9915 if (!IS_BLANK_CH(RAW)) {
9916 if ((RAW == '?') && (NXT(1) == '>')) {
9917 SKIP(2);
9918 return;
9919 }
9920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9921 }
9922 xmlParseEncodingDecl(ctxt);
9923 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9924 /*
9925 * The XML REC instructs us to stop parsing right here
9926 */
9927 return;
9928 }
9929
9930 /*
9931 * We may have the standalone status.
9932 */
9933 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9934 if ((RAW == '?') && (NXT(1) == '>')) {
9935 SKIP(2);
9936 return;
9937 }
9938 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9939 }
9940 SKIP_BLANKS;
9941 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9942
9943 SKIP_BLANKS;
9944 if ((RAW == '?') && (NXT(1) == '>')) {
9945 SKIP(2);
9946 } else if (RAW == '>') {
9947 /* Deprecated old WD ... */
9948 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9949 NEXT;
9950 } else {
9951 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9952 MOVETO_ENDTAG(CUR_PTR);
9953 NEXT;
9954 }
9955 }
9956
9957 /**
9958 * xmlParseMisc:
9959 * @ctxt: an XML parser context
9960 *
9961 * parse an XML Misc* optional field.
9962 *
9963 * [27] Misc ::= Comment | PI | S
9964 */
9965
9966 void
xmlParseMisc(xmlParserCtxtPtr ctxt)9967 xmlParseMisc(xmlParserCtxtPtr ctxt) {
9968 while (((RAW == '<') && (NXT(1) == '?')) ||
9969 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9970 IS_BLANK_CH(CUR)) {
9971 if ((RAW == '<') && (NXT(1) == '?')) {
9972 xmlParsePI(ctxt);
9973 } else if (IS_BLANK_CH(CUR)) {
9974 NEXT;
9975 } else
9976 xmlParseComment(ctxt);
9977 }
9978 }
9979
9980 /**
9981 * xmlParseDocument:
9982 * @ctxt: an XML parser context
9983 *
9984 * parse an XML document (and build a tree if using the standard SAX
9985 * interface).
9986 *
9987 * [1] document ::= prolog element Misc*
9988 *
9989 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9990 *
9991 * Returns 0, -1 in case of error. the parser context is augmented
9992 * as a result of the parsing.
9993 */
9994
9995 int
xmlParseDocument(xmlParserCtxtPtr ctxt)9996 xmlParseDocument(xmlParserCtxtPtr ctxt) {
9997 xmlChar start[4];
9998 xmlCharEncoding enc;
9999
10000 xmlInitParser();
10001
10002 if ((ctxt == NULL) || (ctxt->input == NULL))
10003 return(-1);
10004
10005 GROW;
10006
10007 /*
10008 * SAX: detecting the level.
10009 */
10010 xmlDetectSAX2(ctxt);
10011
10012 /*
10013 * SAX: beginning of the document processing.
10014 */
10015 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10016 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10017
10018 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10019 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10020 /*
10021 * Get the 4 first bytes and decode the charset
10022 * if enc != XML_CHAR_ENCODING_NONE
10023 * plug some encoding conversion routines.
10024 */
10025 start[0] = RAW;
10026 start[1] = NXT(1);
10027 start[2] = NXT(2);
10028 start[3] = NXT(3);
10029 enc = xmlDetectCharEncoding(&start[0], 4);
10030 if (enc != XML_CHAR_ENCODING_NONE) {
10031 xmlSwitchEncoding(ctxt, enc);
10032 }
10033 }
10034
10035
10036 if (CUR == 0) {
10037 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10038 }
10039
10040 /*
10041 * Check for the XMLDecl in the Prolog.
10042 */
10043 GROW;
10044 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10045
10046 /*
10047 * Note that we will switch encoding on the fly.
10048 */
10049 xmlParseXMLDecl(ctxt);
10050 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10051 /*
10052 * The XML REC instructs us to stop parsing right here
10053 */
10054 return(-1);
10055 }
10056 ctxt->standalone = ctxt->input->standalone;
10057 SKIP_BLANKS;
10058 } else {
10059 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10060 }
10061 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10062 ctxt->sax->startDocument(ctxt->userData);
10063
10064 /*
10065 * The Misc part of the Prolog
10066 */
10067 GROW;
10068 xmlParseMisc(ctxt);
10069
10070 /*
10071 * Then possibly doc type declaration(s) and more Misc
10072 * (doctypedecl Misc*)?
10073 */
10074 GROW;
10075 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10076
10077 ctxt->inSubset = 1;
10078 xmlParseDocTypeDecl(ctxt);
10079 if (RAW == '[') {
10080 ctxt->instate = XML_PARSER_DTD;
10081 xmlParseInternalSubset(ctxt);
10082 }
10083
10084 /*
10085 * Create and update the external subset.
10086 */
10087 ctxt->inSubset = 2;
10088 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10089 (!ctxt->disableSAX))
10090 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10091 ctxt->extSubSystem, ctxt->extSubURI);
10092 ctxt->inSubset = 0;
10093
10094 xmlCleanSpecialAttr(ctxt);
10095
10096 ctxt->instate = XML_PARSER_PROLOG;
10097 xmlParseMisc(ctxt);
10098 }
10099
10100 /*
10101 * Time to start parsing the tree itself
10102 */
10103 GROW;
10104 if (RAW != '<') {
10105 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10106 "Start tag expected, '<' not found\n");
10107 } else {
10108 ctxt->instate = XML_PARSER_CONTENT;
10109 xmlParseElement(ctxt);
10110 ctxt->instate = XML_PARSER_EPILOG;
10111
10112
10113 /*
10114 * The Misc part at the end
10115 */
10116 xmlParseMisc(ctxt);
10117
10118 if (RAW != 0) {
10119 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10120 }
10121 ctxt->instate = XML_PARSER_EOF;
10122 }
10123
10124 /*
10125 * SAX: end of the document processing.
10126 */
10127 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10128 ctxt->sax->endDocument(ctxt->userData);
10129
10130 /*
10131 * Remove locally kept entity definitions if the tree was not built
10132 */
10133 if ((ctxt->myDoc != NULL) &&
10134 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10135 xmlFreeDoc(ctxt->myDoc);
10136 ctxt->myDoc = NULL;
10137 }
10138
10139 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10140 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10141 if (ctxt->valid)
10142 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10143 if (ctxt->nsWellFormed)
10144 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10145 if (ctxt->options & XML_PARSE_OLD10)
10146 ctxt->myDoc->properties |= XML_DOC_OLD10;
10147 }
10148 if (! ctxt->wellFormed) {
10149 ctxt->valid = 0;
10150 return(-1);
10151 }
10152 return(0);
10153 }
10154
10155 /**
10156 * xmlParseExtParsedEnt:
10157 * @ctxt: an XML parser context
10158 *
10159 * parse a general parsed entity
10160 * An external general parsed entity is well-formed if it matches the
10161 * production labeled extParsedEnt.
10162 *
10163 * [78] extParsedEnt ::= TextDecl? content
10164 *
10165 * Returns 0, -1 in case of error. the parser context is augmented
10166 * as a result of the parsing.
10167 */
10168
10169 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10170 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10171 xmlChar start[4];
10172 xmlCharEncoding enc;
10173
10174 if ((ctxt == NULL) || (ctxt->input == NULL))
10175 return(-1);
10176
10177 xmlDefaultSAXHandlerInit();
10178
10179 xmlDetectSAX2(ctxt);
10180
10181 GROW;
10182
10183 /*
10184 * SAX: beginning of the document processing.
10185 */
10186 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10187 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10188
10189 /*
10190 * Get the 4 first bytes and decode the charset
10191 * if enc != XML_CHAR_ENCODING_NONE
10192 * plug some encoding conversion routines.
10193 */
10194 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10195 start[0] = RAW;
10196 start[1] = NXT(1);
10197 start[2] = NXT(2);
10198 start[3] = NXT(3);
10199 enc = xmlDetectCharEncoding(start, 4);
10200 if (enc != XML_CHAR_ENCODING_NONE) {
10201 xmlSwitchEncoding(ctxt, enc);
10202 }
10203 }
10204
10205
10206 if (CUR == 0) {
10207 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10208 }
10209
10210 /*
10211 * Check for the XMLDecl in the Prolog.
10212 */
10213 GROW;
10214 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10215
10216 /*
10217 * Note that we will switch encoding on the fly.
10218 */
10219 xmlParseXMLDecl(ctxt);
10220 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10221 /*
10222 * The XML REC instructs us to stop parsing right here
10223 */
10224 return(-1);
10225 }
10226 SKIP_BLANKS;
10227 } else {
10228 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10229 }
10230 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10231 ctxt->sax->startDocument(ctxt->userData);
10232
10233 /*
10234 * Doing validity checking on chunk doesn't make sense
10235 */
10236 ctxt->instate = XML_PARSER_CONTENT;
10237 ctxt->validate = 0;
10238 ctxt->loadsubset = 0;
10239 ctxt->depth = 0;
10240
10241 xmlParseContent(ctxt);
10242
10243 if ((RAW == '<') && (NXT(1) == '/')) {
10244 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10245 } else if (RAW != 0) {
10246 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10247 }
10248
10249 /*
10250 * SAX: end of the document processing.
10251 */
10252 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10253 ctxt->sax->endDocument(ctxt->userData);
10254
10255 if (! ctxt->wellFormed) return(-1);
10256 return(0);
10257 }
10258
10259 #ifdef LIBXML_PUSH_ENABLED
10260 /************************************************************************
10261 * *
10262 * Progressive parsing interfaces *
10263 * *
10264 ************************************************************************/
10265
10266 /**
10267 * xmlParseLookupSequence:
10268 * @ctxt: an XML parser context
10269 * @first: the first char to lookup
10270 * @next: the next char to lookup or zero
10271 * @third: the next char to lookup or zero
10272 *
10273 * Try to find if a sequence (first, next, third) or just (first next) or
10274 * (first) is available in the input stream.
10275 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10276 * to avoid rescanning sequences of bytes, it DOES change the state of the
10277 * parser, do not use liberally.
10278 *
10279 * Returns the index to the current parsing point if the full sequence
10280 * is available, -1 otherwise.
10281 */
10282 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10283 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10284 xmlChar next, xmlChar third) {
10285 int base, len;
10286 xmlParserInputPtr in;
10287 const xmlChar *buf;
10288
10289 in = ctxt->input;
10290 if (in == NULL) return(-1);
10291 base = in->cur - in->base;
10292 if (base < 0) return(-1);
10293 if (ctxt->checkIndex > base)
10294 base = ctxt->checkIndex;
10295 if (in->buf == NULL) {
10296 buf = in->base;
10297 len = in->length;
10298 } else {
10299 buf = in->buf->buffer->content;
10300 len = in->buf->buffer->use;
10301 }
10302 /* take into account the sequence length */
10303 if (third) len -= 2;
10304 else if (next) len --;
10305 for (;base < len;base++) {
10306 if (buf[base] == first) {
10307 if (third != 0) {
10308 if ((buf[base + 1] != next) ||
10309 (buf[base + 2] != third)) continue;
10310 } else if (next != 0) {
10311 if (buf[base + 1] != next) continue;
10312 }
10313 ctxt->checkIndex = 0;
10314 #ifdef DEBUG_PUSH
10315 if (next == 0)
10316 xmlGenericError(xmlGenericErrorContext,
10317 "PP: lookup '%c' found at %d\n",
10318 first, base);
10319 else if (third == 0)
10320 xmlGenericError(xmlGenericErrorContext,
10321 "PP: lookup '%c%c' found at %d\n",
10322 first, next, base);
10323 else
10324 xmlGenericError(xmlGenericErrorContext,
10325 "PP: lookup '%c%c%c' found at %d\n",
10326 first, next, third, base);
10327 #endif
10328 return(base - (in->cur - in->base));
10329 }
10330 }
10331 ctxt->checkIndex = base;
10332 #ifdef DEBUG_PUSH
10333 if (next == 0)
10334 xmlGenericError(xmlGenericErrorContext,
10335 "PP: lookup '%c' failed\n", first);
10336 else if (third == 0)
10337 xmlGenericError(xmlGenericErrorContext,
10338 "PP: lookup '%c%c' failed\n", first, next);
10339 else
10340 xmlGenericError(xmlGenericErrorContext,
10341 "PP: lookup '%c%c%c' failed\n", first, next, third);
10342 #endif
10343 return(-1);
10344 }
10345
10346 /**
10347 * xmlParseGetLasts:
10348 * @ctxt: an XML parser context
10349 * @lastlt: pointer to store the last '<' from the input
10350 * @lastgt: pointer to store the last '>' from the input
10351 *
10352 * Lookup the last < and > in the current chunk
10353 */
10354 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10355 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10356 const xmlChar **lastgt) {
10357 const xmlChar *tmp;
10358
10359 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10360 xmlGenericError(xmlGenericErrorContext,
10361 "Internal error: xmlParseGetLasts\n");
10362 return;
10363 }
10364 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10365 tmp = ctxt->input->end;
10366 tmp--;
10367 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10368 if (tmp < ctxt->input->base) {
10369 *lastlt = NULL;
10370 *lastgt = NULL;
10371 } else {
10372 *lastlt = tmp;
10373 tmp++;
10374 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10375 if (*tmp == '\'') {
10376 tmp++;
10377 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10378 if (tmp < ctxt->input->end) tmp++;
10379 } else if (*tmp == '"') {
10380 tmp++;
10381 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10382 if (tmp < ctxt->input->end) tmp++;
10383 } else
10384 tmp++;
10385 }
10386 if (tmp < ctxt->input->end)
10387 *lastgt = tmp;
10388 else {
10389 tmp = *lastlt;
10390 tmp--;
10391 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10392 if (tmp >= ctxt->input->base)
10393 *lastgt = tmp;
10394 else
10395 *lastgt = NULL;
10396 }
10397 }
10398 } else {
10399 *lastlt = NULL;
10400 *lastgt = NULL;
10401 }
10402 }
10403 /**
10404 * xmlCheckCdataPush:
10405 * @cur: pointer to the bock of characters
10406 * @len: length of the block in bytes
10407 *
10408 * Check that the block of characters is okay as SCdata content [20]
10409 *
10410 * Returns the number of bytes to pass if okay, a negative index where an
10411 * UTF-8 error occured otherwise
10412 */
10413 static int
xmlCheckCdataPush(const xmlChar * utf,int len)10414 xmlCheckCdataPush(const xmlChar *utf, int len) {
10415 int ix;
10416 unsigned char c;
10417 int codepoint;
10418
10419 if ((utf == NULL) || (len <= 0))
10420 return(0);
10421
10422 for (ix = 0; ix < len;) { /* string is 0-terminated */
10423 c = utf[ix];
10424 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10425 if (c >= 0x20)
10426 ix++;
10427 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10428 ix++;
10429 else
10430 return(-ix);
10431 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10432 if (ix + 2 > len) return(ix);
10433 if ((utf[ix+1] & 0xc0 ) != 0x80)
10434 return(-ix);
10435 codepoint = (utf[ix] & 0x1f) << 6;
10436 codepoint |= utf[ix+1] & 0x3f;
10437 if (!xmlIsCharQ(codepoint))
10438 return(-ix);
10439 ix += 2;
10440 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10441 if (ix + 3 > len) return(ix);
10442 if (((utf[ix+1] & 0xc0) != 0x80) ||
10443 ((utf[ix+2] & 0xc0) != 0x80))
10444 return(-ix);
10445 codepoint = (utf[ix] & 0xf) << 12;
10446 codepoint |= (utf[ix+1] & 0x3f) << 6;
10447 codepoint |= utf[ix+2] & 0x3f;
10448 if (!xmlIsCharQ(codepoint))
10449 return(-ix);
10450 ix += 3;
10451 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10452 if (ix + 4 > len) return(ix);
10453 if (((utf[ix+1] & 0xc0) != 0x80) ||
10454 ((utf[ix+2] & 0xc0) != 0x80) ||
10455 ((utf[ix+3] & 0xc0) != 0x80))
10456 return(-ix);
10457 codepoint = (utf[ix] & 0x7) << 18;
10458 codepoint |= (utf[ix+1] & 0x3f) << 12;
10459 codepoint |= (utf[ix+2] & 0x3f) << 6;
10460 codepoint |= utf[ix+3] & 0x3f;
10461 if (!xmlIsCharQ(codepoint))
10462 return(-ix);
10463 ix += 4;
10464 } else /* unknown encoding */
10465 return(-ix);
10466 }
10467 return(ix);
10468 }
10469
10470 /**
10471 * xmlParseTryOrFinish:
10472 * @ctxt: an XML parser context
10473 * @terminate: last chunk indicator
10474 *
10475 * Try to progress on parsing
10476 *
10477 * Returns zero if no parsing was possible
10478 */
10479 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)10480 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10481 int ret = 0;
10482 int avail, tlen;
10483 xmlChar cur, next;
10484 const xmlChar *lastlt, *lastgt;
10485
10486 if (ctxt->input == NULL)
10487 return(0);
10488
10489 #ifdef DEBUG_PUSH
10490 switch (ctxt->instate) {
10491 case XML_PARSER_EOF:
10492 xmlGenericError(xmlGenericErrorContext,
10493 "PP: try EOF\n"); break;
10494 case XML_PARSER_START:
10495 xmlGenericError(xmlGenericErrorContext,
10496 "PP: try START\n"); break;
10497 case XML_PARSER_MISC:
10498 xmlGenericError(xmlGenericErrorContext,
10499 "PP: try MISC\n");break;
10500 case XML_PARSER_COMMENT:
10501 xmlGenericError(xmlGenericErrorContext,
10502 "PP: try COMMENT\n");break;
10503 case XML_PARSER_PROLOG:
10504 xmlGenericError(xmlGenericErrorContext,
10505 "PP: try PROLOG\n");break;
10506 case XML_PARSER_START_TAG:
10507 xmlGenericError(xmlGenericErrorContext,
10508 "PP: try START_TAG\n");break;
10509 case XML_PARSER_CONTENT:
10510 xmlGenericError(xmlGenericErrorContext,
10511 "PP: try CONTENT\n");break;
10512 case XML_PARSER_CDATA_SECTION:
10513 xmlGenericError(xmlGenericErrorContext,
10514 "PP: try CDATA_SECTION\n");break;
10515 case XML_PARSER_END_TAG:
10516 xmlGenericError(xmlGenericErrorContext,
10517 "PP: try END_TAG\n");break;
10518 case XML_PARSER_ENTITY_DECL:
10519 xmlGenericError(xmlGenericErrorContext,
10520 "PP: try ENTITY_DECL\n");break;
10521 case XML_PARSER_ENTITY_VALUE:
10522 xmlGenericError(xmlGenericErrorContext,
10523 "PP: try ENTITY_VALUE\n");break;
10524 case XML_PARSER_ATTRIBUTE_VALUE:
10525 xmlGenericError(xmlGenericErrorContext,
10526 "PP: try ATTRIBUTE_VALUE\n");break;
10527 case XML_PARSER_DTD:
10528 xmlGenericError(xmlGenericErrorContext,
10529 "PP: try DTD\n");break;
10530 case XML_PARSER_EPILOG:
10531 xmlGenericError(xmlGenericErrorContext,
10532 "PP: try EPILOG\n");break;
10533 case XML_PARSER_PI:
10534 xmlGenericError(xmlGenericErrorContext,
10535 "PP: try PI\n");break;
10536 case XML_PARSER_IGNORE:
10537 xmlGenericError(xmlGenericErrorContext,
10538 "PP: try IGNORE\n");break;
10539 }
10540 #endif
10541
10542 if ((ctxt->input != NULL) &&
10543 (ctxt->input->cur - ctxt->input->base > 4096)) {
10544 xmlSHRINK(ctxt);
10545 ctxt->checkIndex = 0;
10546 }
10547 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10548
10549 while (1) {
10550 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10551 return(0);
10552
10553
10554 /*
10555 * Pop-up of finished entities.
10556 */
10557 while ((RAW == 0) && (ctxt->inputNr > 1))
10558 xmlPopInput(ctxt);
10559
10560 if (ctxt->input == NULL) break;
10561 if (ctxt->input->buf == NULL)
10562 avail = ctxt->input->length -
10563 (ctxt->input->cur - ctxt->input->base);
10564 else {
10565 /*
10566 * If we are operating on converted input, try to flush
10567 * remainng chars to avoid them stalling in the non-converted
10568 * buffer.
10569 */
10570 if ((ctxt->input->buf->raw != NULL) &&
10571 (ctxt->input->buf->raw->use > 0)) {
10572 int base = ctxt->input->base -
10573 ctxt->input->buf->buffer->content;
10574 int current = ctxt->input->cur - ctxt->input->base;
10575
10576 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10577 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10578 ctxt->input->cur = ctxt->input->base + current;
10579 ctxt->input->end =
10580 &ctxt->input->buf->buffer->content[
10581 ctxt->input->buf->buffer->use];
10582 }
10583 avail = ctxt->input->buf->buffer->use -
10584 (ctxt->input->cur - ctxt->input->base);
10585 }
10586 if (avail < 1)
10587 goto done;
10588 switch (ctxt->instate) {
10589 case XML_PARSER_EOF:
10590 /*
10591 * Document parsing is done !
10592 */
10593 goto done;
10594 case XML_PARSER_START:
10595 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10596 xmlChar start[4];
10597 xmlCharEncoding enc;
10598
10599 /*
10600 * Very first chars read from the document flow.
10601 */
10602 if (avail < 4)
10603 goto done;
10604
10605 /*
10606 * Get the 4 first bytes and decode the charset
10607 * if enc != XML_CHAR_ENCODING_NONE
10608 * plug some encoding conversion routines,
10609 * else xmlSwitchEncoding will set to (default)
10610 * UTF8.
10611 */
10612 start[0] = RAW;
10613 start[1] = NXT(1);
10614 start[2] = NXT(2);
10615 start[3] = NXT(3);
10616 enc = xmlDetectCharEncoding(start, 4);
10617 xmlSwitchEncoding(ctxt, enc);
10618 break;
10619 }
10620
10621 if (avail < 2)
10622 goto done;
10623 cur = ctxt->input->cur[0];
10624 next = ctxt->input->cur[1];
10625 if (cur == 0) {
10626 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10627 ctxt->sax->setDocumentLocator(ctxt->userData,
10628 &xmlDefaultSAXLocator);
10629 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10630 ctxt->instate = XML_PARSER_EOF;
10631 #ifdef DEBUG_PUSH
10632 xmlGenericError(xmlGenericErrorContext,
10633 "PP: entering EOF\n");
10634 #endif
10635 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10636 ctxt->sax->endDocument(ctxt->userData);
10637 goto done;
10638 }
10639 if ((cur == '<') && (next == '?')) {
10640 /* PI or XML decl */
10641 if (avail < 5) return(ret);
10642 if ((!terminate) &&
10643 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10644 return(ret);
10645 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10646 ctxt->sax->setDocumentLocator(ctxt->userData,
10647 &xmlDefaultSAXLocator);
10648 if ((ctxt->input->cur[2] == 'x') &&
10649 (ctxt->input->cur[3] == 'm') &&
10650 (ctxt->input->cur[4] == 'l') &&
10651 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10652 ret += 5;
10653 #ifdef DEBUG_PUSH
10654 xmlGenericError(xmlGenericErrorContext,
10655 "PP: Parsing XML Decl\n");
10656 #endif
10657 xmlParseXMLDecl(ctxt);
10658 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10659 /*
10660 * The XML REC instructs us to stop parsing right
10661 * here
10662 */
10663 ctxt->instate = XML_PARSER_EOF;
10664 return(0);
10665 }
10666 ctxt->standalone = ctxt->input->standalone;
10667 if ((ctxt->encoding == NULL) &&
10668 (ctxt->input->encoding != NULL))
10669 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10670 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10671 (!ctxt->disableSAX))
10672 ctxt->sax->startDocument(ctxt->userData);
10673 ctxt->instate = XML_PARSER_MISC;
10674 #ifdef DEBUG_PUSH
10675 xmlGenericError(xmlGenericErrorContext,
10676 "PP: entering MISC\n");
10677 #endif
10678 } else {
10679 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10680 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10681 (!ctxt->disableSAX))
10682 ctxt->sax->startDocument(ctxt->userData);
10683 ctxt->instate = XML_PARSER_MISC;
10684 #ifdef DEBUG_PUSH
10685 xmlGenericError(xmlGenericErrorContext,
10686 "PP: entering MISC\n");
10687 #endif
10688 }
10689 } else {
10690 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10691 ctxt->sax->setDocumentLocator(ctxt->userData,
10692 &xmlDefaultSAXLocator);
10693 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10694 if (ctxt->version == NULL) {
10695 xmlErrMemory(ctxt, NULL);
10696 break;
10697 }
10698 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10699 (!ctxt->disableSAX))
10700 ctxt->sax->startDocument(ctxt->userData);
10701 ctxt->instate = XML_PARSER_MISC;
10702 #ifdef DEBUG_PUSH
10703 xmlGenericError(xmlGenericErrorContext,
10704 "PP: entering MISC\n");
10705 #endif
10706 }
10707 break;
10708 case XML_PARSER_START_TAG: {
10709 const xmlChar *name;
10710 const xmlChar *prefix;
10711 const xmlChar *URI;
10712 int nsNr = ctxt->nsNr;
10713
10714 if ((avail < 2) && (ctxt->inputNr == 1))
10715 goto done;
10716 cur = ctxt->input->cur[0];
10717 if (cur != '<') {
10718 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10719 ctxt->instate = XML_PARSER_EOF;
10720 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10721 ctxt->sax->endDocument(ctxt->userData);
10722 goto done;
10723 }
10724 if (!terminate) {
10725 if (ctxt->progressive) {
10726 /* > can be found unescaped in attribute values */
10727 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10728 goto done;
10729 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10730 goto done;
10731 }
10732 }
10733 if (ctxt->spaceNr == 0)
10734 spacePush(ctxt, -1);
10735 else if (*ctxt->space == -2)
10736 spacePush(ctxt, -1);
10737 else
10738 spacePush(ctxt, *ctxt->space);
10739 #ifdef LIBXML_SAX1_ENABLED
10740 if (ctxt->sax2)
10741 #endif /* LIBXML_SAX1_ENABLED */
10742 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10743 #ifdef LIBXML_SAX1_ENABLED
10744 else
10745 name = xmlParseStartTag(ctxt);
10746 #endif /* LIBXML_SAX1_ENABLED */
10747 if (name == NULL) {
10748 spacePop(ctxt);
10749 ctxt->instate = XML_PARSER_EOF;
10750 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10751 ctxt->sax->endDocument(ctxt->userData);
10752 goto done;
10753 }
10754 #ifdef LIBXML_VALID_ENABLED
10755 /*
10756 * [ VC: Root Element Type ]
10757 * The Name in the document type declaration must match
10758 * the element type of the root element.
10759 */
10760 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10761 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10762 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10763 #endif /* LIBXML_VALID_ENABLED */
10764
10765 /*
10766 * Check for an Empty Element.
10767 */
10768 if ((RAW == '/') && (NXT(1) == '>')) {
10769 SKIP(2);
10770
10771 if (ctxt->sax2) {
10772 if ((ctxt->sax != NULL) &&
10773 (ctxt->sax->endElementNs != NULL) &&
10774 (!ctxt->disableSAX))
10775 ctxt->sax->endElementNs(ctxt->userData, name,
10776 prefix, URI);
10777 if (ctxt->nsNr - nsNr > 0)
10778 nsPop(ctxt, ctxt->nsNr - nsNr);
10779 #ifdef LIBXML_SAX1_ENABLED
10780 } else {
10781 if ((ctxt->sax != NULL) &&
10782 (ctxt->sax->endElement != NULL) &&
10783 (!ctxt->disableSAX))
10784 ctxt->sax->endElement(ctxt->userData, name);
10785 #endif /* LIBXML_SAX1_ENABLED */
10786 }
10787 spacePop(ctxt);
10788 if (ctxt->nameNr == 0) {
10789 ctxt->instate = XML_PARSER_EPILOG;
10790 } else {
10791 ctxt->instate = XML_PARSER_CONTENT;
10792 }
10793 break;
10794 }
10795 if (RAW == '>') {
10796 NEXT;
10797 } else {
10798 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10799 "Couldn't find end of Start Tag %s\n",
10800 name);
10801 nodePop(ctxt);
10802 spacePop(ctxt);
10803 }
10804 if (ctxt->sax2)
10805 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
10806 #ifdef LIBXML_SAX1_ENABLED
10807 else
10808 namePush(ctxt, name);
10809 #endif /* LIBXML_SAX1_ENABLED */
10810
10811 ctxt->instate = XML_PARSER_CONTENT;
10812 break;
10813 }
10814 case XML_PARSER_CONTENT: {
10815 const xmlChar *test;
10816 unsigned int cons;
10817 if ((avail < 2) && (ctxt->inputNr == 1))
10818 goto done;
10819 cur = ctxt->input->cur[0];
10820 next = ctxt->input->cur[1];
10821
10822 test = CUR_PTR;
10823 cons = ctxt->input->consumed;
10824 if ((cur == '<') && (next == '/')) {
10825 ctxt->instate = XML_PARSER_END_TAG;
10826 break;
10827 } else if ((cur == '<') && (next == '?')) {
10828 if ((!terminate) &&
10829 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10830 goto done;
10831 xmlParsePI(ctxt);
10832 } else if ((cur == '<') && (next != '!')) {
10833 ctxt->instate = XML_PARSER_START_TAG;
10834 break;
10835 } else if ((cur == '<') && (next == '!') &&
10836 (ctxt->input->cur[2] == '-') &&
10837 (ctxt->input->cur[3] == '-')) {
10838 int term;
10839
10840 if (avail < 4)
10841 goto done;
10842 ctxt->input->cur += 4;
10843 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10844 ctxt->input->cur -= 4;
10845 if ((!terminate) && (term < 0))
10846 goto done;
10847 xmlParseComment(ctxt);
10848 ctxt->instate = XML_PARSER_CONTENT;
10849 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10850 (ctxt->input->cur[2] == '[') &&
10851 (ctxt->input->cur[3] == 'C') &&
10852 (ctxt->input->cur[4] == 'D') &&
10853 (ctxt->input->cur[5] == 'A') &&
10854 (ctxt->input->cur[6] == 'T') &&
10855 (ctxt->input->cur[7] == 'A') &&
10856 (ctxt->input->cur[8] == '[')) {
10857 SKIP(9);
10858 ctxt->instate = XML_PARSER_CDATA_SECTION;
10859 break;
10860 } else if ((cur == '<') && (next == '!') &&
10861 (avail < 9)) {
10862 goto done;
10863 } else if (cur == '&') {
10864 if ((!terminate) &&
10865 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10866 goto done;
10867 xmlParseReference(ctxt);
10868 } else {
10869 /* TODO Avoid the extra copy, handle directly !!! */
10870 /*
10871 * Goal of the following test is:
10872 * - minimize calls to the SAX 'character' callback
10873 * when they are mergeable
10874 * - handle an problem for isBlank when we only parse
10875 * a sequence of blank chars and the next one is
10876 * not available to check against '<' presence.
10877 * - tries to homogenize the differences in SAX
10878 * callbacks between the push and pull versions
10879 * of the parser.
10880 */
10881 if ((ctxt->inputNr == 1) &&
10882 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10883 if (!terminate) {
10884 if (ctxt->progressive) {
10885 if ((lastlt == NULL) ||
10886 (ctxt->input->cur > lastlt))
10887 goto done;
10888 } else if (xmlParseLookupSequence(ctxt,
10889 '<', 0, 0) < 0) {
10890 goto done;
10891 }
10892 }
10893 }
10894 ctxt->checkIndex = 0;
10895 xmlParseCharData(ctxt, 0);
10896 }
10897 /*
10898 * Pop-up of finished entities.
10899 */
10900 while ((RAW == 0) && (ctxt->inputNr > 1))
10901 xmlPopInput(ctxt);
10902 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10903 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10904 "detected an error in element content\n");
10905 ctxt->instate = XML_PARSER_EOF;
10906 break;
10907 }
10908 break;
10909 }
10910 case XML_PARSER_END_TAG:
10911 if (avail < 2)
10912 goto done;
10913 if (!terminate) {
10914 if (ctxt->progressive) {
10915 /* > can be found unescaped in attribute values */
10916 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10917 goto done;
10918 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10919 goto done;
10920 }
10921 }
10922 if (ctxt->sax2) {
10923 xmlParseEndTag2(ctxt,
10924 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10925 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10926 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10927 nameNsPop(ctxt);
10928 }
10929 #ifdef LIBXML_SAX1_ENABLED
10930 else
10931 xmlParseEndTag1(ctxt, 0);
10932 #endif /* LIBXML_SAX1_ENABLED */
10933 if (ctxt->nameNr == 0) {
10934 ctxt->instate = XML_PARSER_EPILOG;
10935 } else {
10936 ctxt->instate = XML_PARSER_CONTENT;
10937 }
10938 break;
10939 case XML_PARSER_CDATA_SECTION: {
10940 /*
10941 * The Push mode need to have the SAX callback for
10942 * cdataBlock merge back contiguous callbacks.
10943 */
10944 int base;
10945
10946 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10947 if (base < 0) {
10948 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10949 int tmp;
10950
10951 tmp = xmlCheckCdataPush(ctxt->input->cur,
10952 XML_PARSER_BIG_BUFFER_SIZE);
10953 if (tmp < 0) {
10954 tmp = -tmp;
10955 ctxt->input->cur += tmp;
10956 goto encoding_error;
10957 }
10958 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10959 if (ctxt->sax->cdataBlock != NULL)
10960 ctxt->sax->cdataBlock(ctxt->userData,
10961 ctxt->input->cur, tmp);
10962 else if (ctxt->sax->characters != NULL)
10963 ctxt->sax->characters(ctxt->userData,
10964 ctxt->input->cur, tmp);
10965 }
10966 SKIPL(tmp);
10967 ctxt->checkIndex = 0;
10968 }
10969 goto done;
10970 } else {
10971 int tmp;
10972
10973 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10974 if ((tmp < 0) || (tmp != base)) {
10975 tmp = -tmp;
10976 ctxt->input->cur += tmp;
10977 goto encoding_error;
10978 }
10979 if ((ctxt->sax != NULL) && (base == 0) &&
10980 (ctxt->sax->cdataBlock != NULL) &&
10981 (!ctxt->disableSAX)) {
10982 /*
10983 * Special case to provide identical behaviour
10984 * between pull and push parsers on enpty CDATA
10985 * sections
10986 */
10987 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10988 (!strncmp((const char *)&ctxt->input->cur[-9],
10989 "<![CDATA[", 9)))
10990 ctxt->sax->cdataBlock(ctxt->userData,
10991 BAD_CAST "", 0);
10992 } else if ((ctxt->sax != NULL) && (base > 0) &&
10993 (!ctxt->disableSAX)) {
10994 if (ctxt->sax->cdataBlock != NULL)
10995 ctxt->sax->cdataBlock(ctxt->userData,
10996 ctxt->input->cur, base);
10997 else if (ctxt->sax->characters != NULL)
10998 ctxt->sax->characters(ctxt->userData,
10999 ctxt->input->cur, base);
11000 }
11001 SKIPL(base + 3);
11002 ctxt->checkIndex = 0;
11003 ctxt->instate = XML_PARSER_CONTENT;
11004 #ifdef DEBUG_PUSH
11005 xmlGenericError(xmlGenericErrorContext,
11006 "PP: entering CONTENT\n");
11007 #endif
11008 }
11009 break;
11010 }
11011 case XML_PARSER_MISC:
11012 SKIP_BLANKS;
11013 if (ctxt->input->buf == NULL)
11014 avail = ctxt->input->length -
11015 (ctxt->input->cur - ctxt->input->base);
11016 else
11017 avail = ctxt->input->buf->buffer->use -
11018 (ctxt->input->cur - ctxt->input->base);
11019 if (avail < 2)
11020 goto done;
11021 cur = ctxt->input->cur[0];
11022 next = ctxt->input->cur[1];
11023 if ((cur == '<') && (next == '?')) {
11024 if ((!terminate) &&
11025 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11026 goto done;
11027 #ifdef DEBUG_PUSH
11028 xmlGenericError(xmlGenericErrorContext,
11029 "PP: Parsing PI\n");
11030 #endif
11031 xmlParsePI(ctxt);
11032 ctxt->checkIndex = 0;
11033 } else if ((cur == '<') && (next == '!') &&
11034 (ctxt->input->cur[2] == '-') &&
11035 (ctxt->input->cur[3] == '-')) {
11036 if ((!terminate) &&
11037 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11038 goto done;
11039 #ifdef DEBUG_PUSH
11040 xmlGenericError(xmlGenericErrorContext,
11041 "PP: Parsing Comment\n");
11042 #endif
11043 xmlParseComment(ctxt);
11044 ctxt->instate = XML_PARSER_MISC;
11045 ctxt->checkIndex = 0;
11046 } else if ((cur == '<') && (next == '!') &&
11047 (ctxt->input->cur[2] == 'D') &&
11048 (ctxt->input->cur[3] == 'O') &&
11049 (ctxt->input->cur[4] == 'C') &&
11050 (ctxt->input->cur[5] == 'T') &&
11051 (ctxt->input->cur[6] == 'Y') &&
11052 (ctxt->input->cur[7] == 'P') &&
11053 (ctxt->input->cur[8] == 'E')) {
11054 if ((!terminate) &&
11055 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11056 goto done;
11057 #ifdef DEBUG_PUSH
11058 xmlGenericError(xmlGenericErrorContext,
11059 "PP: Parsing internal subset\n");
11060 #endif
11061 ctxt->inSubset = 1;
11062 xmlParseDocTypeDecl(ctxt);
11063 if (RAW == '[') {
11064 ctxt->instate = XML_PARSER_DTD;
11065 #ifdef DEBUG_PUSH
11066 xmlGenericError(xmlGenericErrorContext,
11067 "PP: entering DTD\n");
11068 #endif
11069 } else {
11070 /*
11071 * Create and update the external subset.
11072 */
11073 ctxt->inSubset = 2;
11074 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11075 (ctxt->sax->externalSubset != NULL))
11076 ctxt->sax->externalSubset(ctxt->userData,
11077 ctxt->intSubName, ctxt->extSubSystem,
11078 ctxt->extSubURI);
11079 ctxt->inSubset = 0;
11080 xmlCleanSpecialAttr(ctxt);
11081 ctxt->instate = XML_PARSER_PROLOG;
11082 #ifdef DEBUG_PUSH
11083 xmlGenericError(xmlGenericErrorContext,
11084 "PP: entering PROLOG\n");
11085 #endif
11086 }
11087 } else if ((cur == '<') && (next == '!') &&
11088 (avail < 9)) {
11089 goto done;
11090 } else {
11091 ctxt->instate = XML_PARSER_START_TAG;
11092 ctxt->progressive = 1;
11093 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11094 #ifdef DEBUG_PUSH
11095 xmlGenericError(xmlGenericErrorContext,
11096 "PP: entering START_TAG\n");
11097 #endif
11098 }
11099 break;
11100 case XML_PARSER_PROLOG:
11101 SKIP_BLANKS;
11102 if (ctxt->input->buf == NULL)
11103 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11104 else
11105 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11106 if (avail < 2)
11107 goto done;
11108 cur = ctxt->input->cur[0];
11109 next = ctxt->input->cur[1];
11110 if ((cur == '<') && (next == '?')) {
11111 if ((!terminate) &&
11112 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11113 goto done;
11114 #ifdef DEBUG_PUSH
11115 xmlGenericError(xmlGenericErrorContext,
11116 "PP: Parsing PI\n");
11117 #endif
11118 xmlParsePI(ctxt);
11119 } else if ((cur == '<') && (next == '!') &&
11120 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11121 if ((!terminate) &&
11122 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11123 goto done;
11124 #ifdef DEBUG_PUSH
11125 xmlGenericError(xmlGenericErrorContext,
11126 "PP: Parsing Comment\n");
11127 #endif
11128 xmlParseComment(ctxt);
11129 ctxt->instate = XML_PARSER_PROLOG;
11130 } else if ((cur == '<') && (next == '!') &&
11131 (avail < 4)) {
11132 goto done;
11133 } else {
11134 ctxt->instate = XML_PARSER_START_TAG;
11135 if (ctxt->progressive == 0)
11136 ctxt->progressive = 1;
11137 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11138 #ifdef DEBUG_PUSH
11139 xmlGenericError(xmlGenericErrorContext,
11140 "PP: entering START_TAG\n");
11141 #endif
11142 }
11143 break;
11144 case XML_PARSER_EPILOG:
11145 SKIP_BLANKS;
11146 if (ctxt->input->buf == NULL)
11147 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11148 else
11149 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11150 if (avail < 2)
11151 goto done;
11152 cur = ctxt->input->cur[0];
11153 next = ctxt->input->cur[1];
11154 if ((cur == '<') && (next == '?')) {
11155 if ((!terminate) &&
11156 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11157 goto done;
11158 #ifdef DEBUG_PUSH
11159 xmlGenericError(xmlGenericErrorContext,
11160 "PP: Parsing PI\n");
11161 #endif
11162 xmlParsePI(ctxt);
11163 ctxt->instate = XML_PARSER_EPILOG;
11164 } else if ((cur == '<') && (next == '!') &&
11165 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11166 if ((!terminate) &&
11167 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11168 goto done;
11169 #ifdef DEBUG_PUSH
11170 xmlGenericError(xmlGenericErrorContext,
11171 "PP: Parsing Comment\n");
11172 #endif
11173 xmlParseComment(ctxt);
11174 ctxt->instate = XML_PARSER_EPILOG;
11175 } else if ((cur == '<') && (next == '!') &&
11176 (avail < 4)) {
11177 goto done;
11178 } else {
11179 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11180 ctxt->instate = XML_PARSER_EOF;
11181 #ifdef DEBUG_PUSH
11182 xmlGenericError(xmlGenericErrorContext,
11183 "PP: entering EOF\n");
11184 #endif
11185 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11186 ctxt->sax->endDocument(ctxt->userData);
11187 goto done;
11188 }
11189 break;
11190 case XML_PARSER_DTD: {
11191 /*
11192 * Sorry but progressive parsing of the internal subset
11193 * is not expected to be supported. We first check that
11194 * the full content of the internal subset is available and
11195 * the parsing is launched only at that point.
11196 * Internal subset ends up with "']' S? '>'" in an unescaped
11197 * section and not in a ']]>' sequence which are conditional
11198 * sections (whoever argued to keep that crap in XML deserve
11199 * a place in hell !).
11200 */
11201 int base, i;
11202 xmlChar *buf;
11203 xmlChar quote = 0;
11204
11205 base = ctxt->input->cur - ctxt->input->base;
11206 if (base < 0) return(0);
11207 if (ctxt->checkIndex > base)
11208 base = ctxt->checkIndex;
11209 buf = ctxt->input->buf->buffer->content;
11210 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11211 base++) {
11212 if (quote != 0) {
11213 if (buf[base] == quote)
11214 quote = 0;
11215 continue;
11216 }
11217 if ((quote == 0) && (buf[base] == '<')) {
11218 int found = 0;
11219 /* special handling of comments */
11220 if (((unsigned int) base + 4 <
11221 ctxt->input->buf->buffer->use) &&
11222 (buf[base + 1] == '!') &&
11223 (buf[base + 2] == '-') &&
11224 (buf[base + 3] == '-')) {
11225 for (;(unsigned int) base + 3 <
11226 ctxt->input->buf->buffer->use; base++) {
11227 if ((buf[base] == '-') &&
11228 (buf[base + 1] == '-') &&
11229 (buf[base + 2] == '>')) {
11230 found = 1;
11231 base += 2;
11232 break;
11233 }
11234 }
11235 if (!found) {
11236 #if 0
11237 fprintf(stderr, "unfinished comment\n");
11238 #endif
11239 break; /* for */
11240 }
11241 continue;
11242 }
11243 }
11244 if (buf[base] == '"') {
11245 quote = '"';
11246 continue;
11247 }
11248 if (buf[base] == '\'') {
11249 quote = '\'';
11250 continue;
11251 }
11252 if (buf[base] == ']') {
11253 #if 0
11254 fprintf(stderr, "%c%c%c%c: ", buf[base],
11255 buf[base + 1], buf[base + 2], buf[base + 3]);
11256 #endif
11257 if ((unsigned int) base +1 >=
11258 ctxt->input->buf->buffer->use)
11259 break;
11260 if (buf[base + 1] == ']') {
11261 /* conditional crap, skip both ']' ! */
11262 base++;
11263 continue;
11264 }
11265 for (i = 1;
11266 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11267 i++) {
11268 if (buf[base + i] == '>') {
11269 #if 0
11270 fprintf(stderr, "found\n");
11271 #endif
11272 goto found_end_int_subset;
11273 }
11274 if (!IS_BLANK_CH(buf[base + i])) {
11275 #if 0
11276 fprintf(stderr, "not found\n");
11277 #endif
11278 goto not_end_of_int_subset;
11279 }
11280 }
11281 #if 0
11282 fprintf(stderr, "end of stream\n");
11283 #endif
11284 break;
11285
11286 }
11287 not_end_of_int_subset:
11288 continue; /* for */
11289 }
11290 /*
11291 * We didn't found the end of the Internal subset
11292 */
11293 #ifdef DEBUG_PUSH
11294 if (next == 0)
11295 xmlGenericError(xmlGenericErrorContext,
11296 "PP: lookup of int subset end filed\n");
11297 #endif
11298 goto done;
11299
11300 found_end_int_subset:
11301 xmlParseInternalSubset(ctxt);
11302 ctxt->inSubset = 2;
11303 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11304 (ctxt->sax->externalSubset != NULL))
11305 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11306 ctxt->extSubSystem, ctxt->extSubURI);
11307 ctxt->inSubset = 0;
11308 xmlCleanSpecialAttr(ctxt);
11309 ctxt->instate = XML_PARSER_PROLOG;
11310 ctxt->checkIndex = 0;
11311 #ifdef DEBUG_PUSH
11312 xmlGenericError(xmlGenericErrorContext,
11313 "PP: entering PROLOG\n");
11314 #endif
11315 break;
11316 }
11317 case XML_PARSER_COMMENT:
11318 xmlGenericError(xmlGenericErrorContext,
11319 "PP: internal error, state == COMMENT\n");
11320 ctxt->instate = XML_PARSER_CONTENT;
11321 #ifdef DEBUG_PUSH
11322 xmlGenericError(xmlGenericErrorContext,
11323 "PP: entering CONTENT\n");
11324 #endif
11325 break;
11326 case XML_PARSER_IGNORE:
11327 xmlGenericError(xmlGenericErrorContext,
11328 "PP: internal error, state == IGNORE");
11329 ctxt->instate = XML_PARSER_DTD;
11330 #ifdef DEBUG_PUSH
11331 xmlGenericError(xmlGenericErrorContext,
11332 "PP: entering DTD\n");
11333 #endif
11334 break;
11335 case XML_PARSER_PI:
11336 xmlGenericError(xmlGenericErrorContext,
11337 "PP: internal error, state == PI\n");
11338 ctxt->instate = XML_PARSER_CONTENT;
11339 #ifdef DEBUG_PUSH
11340 xmlGenericError(xmlGenericErrorContext,
11341 "PP: entering CONTENT\n");
11342 #endif
11343 break;
11344 case XML_PARSER_ENTITY_DECL:
11345 xmlGenericError(xmlGenericErrorContext,
11346 "PP: internal error, state == ENTITY_DECL\n");
11347 ctxt->instate = XML_PARSER_DTD;
11348 #ifdef DEBUG_PUSH
11349 xmlGenericError(xmlGenericErrorContext,
11350 "PP: entering DTD\n");
11351 #endif
11352 break;
11353 case XML_PARSER_ENTITY_VALUE:
11354 xmlGenericError(xmlGenericErrorContext,
11355 "PP: internal error, state == ENTITY_VALUE\n");
11356 ctxt->instate = XML_PARSER_CONTENT;
11357 #ifdef DEBUG_PUSH
11358 xmlGenericError(xmlGenericErrorContext,
11359 "PP: entering DTD\n");
11360 #endif
11361 break;
11362 case XML_PARSER_ATTRIBUTE_VALUE:
11363 xmlGenericError(xmlGenericErrorContext,
11364 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11365 ctxt->instate = XML_PARSER_START_TAG;
11366 #ifdef DEBUG_PUSH
11367 xmlGenericError(xmlGenericErrorContext,
11368 "PP: entering START_TAG\n");
11369 #endif
11370 break;
11371 case XML_PARSER_SYSTEM_LITERAL:
11372 xmlGenericError(xmlGenericErrorContext,
11373 "PP: internal error, state == SYSTEM_LITERAL\n");
11374 ctxt->instate = XML_PARSER_START_TAG;
11375 #ifdef DEBUG_PUSH
11376 xmlGenericError(xmlGenericErrorContext,
11377 "PP: entering START_TAG\n");
11378 #endif
11379 break;
11380 case XML_PARSER_PUBLIC_LITERAL:
11381 xmlGenericError(xmlGenericErrorContext,
11382 "PP: internal error, state == PUBLIC_LITERAL\n");
11383 ctxt->instate = XML_PARSER_START_TAG;
11384 #ifdef DEBUG_PUSH
11385 xmlGenericError(xmlGenericErrorContext,
11386 "PP: entering START_TAG\n");
11387 #endif
11388 break;
11389 }
11390 }
11391 done:
11392 #ifdef DEBUG_PUSH
11393 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11394 #endif
11395 return(ret);
11396 encoding_error:
11397 {
11398 char buffer[150];
11399
11400 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11401 ctxt->input->cur[0], ctxt->input->cur[1],
11402 ctxt->input->cur[2], ctxt->input->cur[3]);
11403 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11404 "Input is not proper UTF-8, indicate encoding !\n%s",
11405 BAD_CAST buffer, NULL);
11406 }
11407 return(0);
11408 }
11409
11410 /**
11411 * xmlParseChunk:
11412 * @ctxt: an XML parser context
11413 * @chunk: an char array
11414 * @size: the size in byte of the chunk
11415 * @terminate: last chunk indicator
11416 *
11417 * Parse a Chunk of memory
11418 *
11419 * Returns zero if no error, the xmlParserErrors otherwise.
11420 */
11421 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11422 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11423 int terminate) {
11424 int end_in_lf = 0;
11425
11426 if (ctxt == NULL)
11427 return(XML_ERR_INTERNAL_ERROR);
11428 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11429 return(ctxt->errNo);
11430 if (ctxt->instate == XML_PARSER_START)
11431 xmlDetectSAX2(ctxt);
11432 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11433 (chunk[size - 1] == '\r')) {
11434 end_in_lf = 1;
11435 size--;
11436 }
11437 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11438 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11439 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11440 int cur = ctxt->input->cur - ctxt->input->base;
11441 int res;
11442
11443 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11444 if (res < 0) {
11445 ctxt->errNo = XML_PARSER_EOF;
11446 ctxt->disableSAX = 1;
11447 return (XML_PARSER_EOF);
11448 }
11449 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11450 ctxt->input->cur = ctxt->input->base + cur;
11451 ctxt->input->end =
11452 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11453 #ifdef DEBUG_PUSH
11454 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11455 #endif
11456
11457 } else if (ctxt->instate != XML_PARSER_EOF) {
11458 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11459 xmlParserInputBufferPtr in = ctxt->input->buf;
11460 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11461 (in->raw != NULL)) {
11462 int nbchars;
11463
11464 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11465 if (nbchars < 0) {
11466 /* TODO 2.6.0 */
11467 xmlGenericError(xmlGenericErrorContext,
11468 "xmlParseChunk: encoder error\n");
11469 return(XML_ERR_INVALID_ENCODING);
11470 }
11471 }
11472 }
11473 }
11474 xmlParseTryOrFinish(ctxt, terminate);
11475 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11476 (ctxt->input->buf != NULL)) {
11477 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11478 }
11479 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11480 return(ctxt->errNo);
11481 if (terminate) {
11482 /*
11483 * Check for termination
11484 */
11485 int avail = 0;
11486
11487 if (ctxt->input != NULL) {
11488 if (ctxt->input->buf == NULL)
11489 avail = ctxt->input->length -
11490 (ctxt->input->cur - ctxt->input->base);
11491 else
11492 avail = ctxt->input->buf->buffer->use -
11493 (ctxt->input->cur - ctxt->input->base);
11494 }
11495
11496 if ((ctxt->instate != XML_PARSER_EOF) &&
11497 (ctxt->instate != XML_PARSER_EPILOG)) {
11498 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11499 }
11500 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11501 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11502 }
11503 if (ctxt->instate != XML_PARSER_EOF) {
11504 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11505 ctxt->sax->endDocument(ctxt->userData);
11506 }
11507 ctxt->instate = XML_PARSER_EOF;
11508 }
11509 return((xmlParserErrors) ctxt->errNo);
11510 }
11511
11512 /************************************************************************
11513 * *
11514 * I/O front end functions to the parser *
11515 * *
11516 ************************************************************************/
11517
11518 /**
11519 * xmlCreatePushParserCtxt:
11520 * @sax: a SAX handler
11521 * @user_data: The user data returned on SAX callbacks
11522 * @chunk: a pointer to an array of chars
11523 * @size: number of chars in the array
11524 * @filename: an optional file name or URI
11525 *
11526 * Create a parser context for using the XML parser in push mode.
11527 * If @buffer and @size are non-NULL, the data is used to detect
11528 * the encoding. The remaining characters will be parsed so they
11529 * don't need to be fed in again through xmlParseChunk.
11530 * To allow content encoding detection, @size should be >= 4
11531 * The value of @filename is used for fetching external entities
11532 * and error/warning reports.
11533 *
11534 * Returns the new parser context or NULL
11535 */
11536
11537 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11538 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11539 const char *chunk, int size, const char *filename) {
11540 xmlParserCtxtPtr ctxt;
11541 xmlParserInputPtr inputStream;
11542 xmlParserInputBufferPtr buf;
11543 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11544
11545 /*
11546 * plug some encoding conversion routines
11547 */
11548 if ((chunk != NULL) && (size >= 4))
11549 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11550
11551 buf = xmlAllocParserInputBuffer(enc);
11552 if (buf == NULL) return(NULL);
11553
11554 ctxt = xmlNewParserCtxt();
11555 if (ctxt == NULL) {
11556 xmlErrMemory(NULL, "creating parser: out of memory\n");
11557 xmlFreeParserInputBuffer(buf);
11558 return(NULL);
11559 }
11560 ctxt->dictNames = 1;
11561 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11562 if (ctxt->pushTab == NULL) {
11563 xmlErrMemory(ctxt, NULL);
11564 xmlFreeParserInputBuffer(buf);
11565 xmlFreeParserCtxt(ctxt);
11566 return(NULL);
11567 }
11568 if (sax != NULL) {
11569 #ifdef LIBXML_SAX1_ENABLED
11570 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11571 #endif /* LIBXML_SAX1_ENABLED */
11572 xmlFree(ctxt->sax);
11573 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11574 if (ctxt->sax == NULL) {
11575 xmlErrMemory(ctxt, NULL);
11576 xmlFreeParserInputBuffer(buf);
11577 xmlFreeParserCtxt(ctxt);
11578 return(NULL);
11579 }
11580 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11581 if (sax->initialized == XML_SAX2_MAGIC)
11582 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11583 else
11584 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11585 if (user_data != NULL)
11586 ctxt->userData = user_data;
11587 }
11588 if (filename == NULL) {
11589 ctxt->directory = NULL;
11590 } else {
11591 ctxt->directory = xmlParserGetDirectory(filename);
11592 }
11593
11594 inputStream = xmlNewInputStream(ctxt);
11595 if (inputStream == NULL) {
11596 xmlFreeParserCtxt(ctxt);
11597 xmlFreeParserInputBuffer(buf);
11598 return(NULL);
11599 }
11600
11601 if (filename == NULL)
11602 inputStream->filename = NULL;
11603 else {
11604 inputStream->filename = (char *)
11605 xmlCanonicPath((const xmlChar *) filename);
11606 if (inputStream->filename == NULL) {
11607 xmlFreeParserCtxt(ctxt);
11608 xmlFreeParserInputBuffer(buf);
11609 return(NULL);
11610 }
11611 }
11612 inputStream->buf = buf;
11613 inputStream->base = inputStream->buf->buffer->content;
11614 inputStream->cur = inputStream->buf->buffer->content;
11615 inputStream->end =
11616 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11617
11618 inputPush(ctxt, inputStream);
11619
11620 /*
11621 * If the caller didn't provide an initial 'chunk' for determining
11622 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11623 * that it can be automatically determined later
11624 */
11625 if ((size == 0) || (chunk == NULL)) {
11626 ctxt->charset = XML_CHAR_ENCODING_NONE;
11627 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11628 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11629 int cur = ctxt->input->cur - ctxt->input->base;
11630
11631 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11632
11633 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11634 ctxt->input->cur = ctxt->input->base + cur;
11635 ctxt->input->end =
11636 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11637 #ifdef DEBUG_PUSH
11638 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11639 #endif
11640 }
11641
11642 if (enc != XML_CHAR_ENCODING_NONE) {
11643 xmlSwitchEncoding(ctxt, enc);
11644 }
11645
11646 return(ctxt);
11647 }
11648 #endif /* LIBXML_PUSH_ENABLED */
11649
11650 /**
11651 * xmlStopParser:
11652 * @ctxt: an XML parser context
11653 *
11654 * Blocks further parser processing
11655 */
11656 void
xmlStopParser(xmlParserCtxtPtr ctxt)11657 xmlStopParser(xmlParserCtxtPtr ctxt) {
11658 if (ctxt == NULL)
11659 return;
11660 ctxt->instate = XML_PARSER_EOF;
11661 ctxt->disableSAX = 1;
11662 if (ctxt->input != NULL) {
11663 ctxt->input->cur = BAD_CAST"";
11664 ctxt->input->base = ctxt->input->cur;
11665 }
11666 }
11667
11668 /**
11669 * xmlCreateIOParserCtxt:
11670 * @sax: a SAX handler
11671 * @user_data: The user data returned on SAX callbacks
11672 * @ioread: an I/O read function
11673 * @ioclose: an I/O close function
11674 * @ioctx: an I/O handler
11675 * @enc: the charset encoding if known
11676 *
11677 * Create a parser context for using the XML parser with an existing
11678 * I/O stream
11679 *
11680 * Returns the new parser context or NULL
11681 */
11682 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11683 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11684 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11685 void *ioctx, xmlCharEncoding enc) {
11686 xmlParserCtxtPtr ctxt;
11687 xmlParserInputPtr inputStream;
11688 xmlParserInputBufferPtr buf;
11689
11690 if (ioread == NULL) return(NULL);
11691
11692 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11693 if (buf == NULL) return(NULL);
11694
11695 ctxt = xmlNewParserCtxt();
11696 if (ctxt == NULL) {
11697 xmlFreeParserInputBuffer(buf);
11698 return(NULL);
11699 }
11700 if (sax != NULL) {
11701 #ifdef LIBXML_SAX1_ENABLED
11702 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11703 #endif /* LIBXML_SAX1_ENABLED */
11704 xmlFree(ctxt->sax);
11705 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11706 if (ctxt->sax == NULL) {
11707 xmlErrMemory(ctxt, NULL);
11708 xmlFreeParserCtxt(ctxt);
11709 return(NULL);
11710 }
11711 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11712 if (sax->initialized == XML_SAX2_MAGIC)
11713 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11714 else
11715 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11716 if (user_data != NULL)
11717 ctxt->userData = user_data;
11718 }
11719
11720 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11721 if (inputStream == NULL) {
11722 xmlFreeParserCtxt(ctxt);
11723 return(NULL);
11724 }
11725 inputPush(ctxt, inputStream);
11726
11727 return(ctxt);
11728 }
11729
11730 #ifdef LIBXML_VALID_ENABLED
11731 /************************************************************************
11732 * *
11733 * Front ends when parsing a DTD *
11734 * *
11735 ************************************************************************/
11736
11737 /**
11738 * xmlIOParseDTD:
11739 * @sax: the SAX handler block or NULL
11740 * @input: an Input Buffer
11741 * @enc: the charset encoding if known
11742 *
11743 * Load and parse a DTD
11744 *
11745 * Returns the resulting xmlDtdPtr or NULL in case of error.
11746 * @input will be freed by the function in any case.
11747 */
11748
11749 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)11750 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11751 xmlCharEncoding enc) {
11752 xmlDtdPtr ret = NULL;
11753 xmlParserCtxtPtr ctxt;
11754 xmlParserInputPtr pinput = NULL;
11755 xmlChar start[4];
11756
11757 if (input == NULL)
11758 return(NULL);
11759
11760 ctxt = xmlNewParserCtxt();
11761 if (ctxt == NULL) {
11762 xmlFreeParserInputBuffer(input);
11763 return(NULL);
11764 }
11765
11766 /*
11767 * Set-up the SAX context
11768 */
11769 if (sax != NULL) {
11770 if (ctxt->sax != NULL)
11771 xmlFree(ctxt->sax);
11772 ctxt->sax = sax;
11773 ctxt->userData = ctxt;
11774 }
11775 xmlDetectSAX2(ctxt);
11776
11777 /*
11778 * generate a parser input from the I/O handler
11779 */
11780
11781 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11782 if (pinput == NULL) {
11783 if (sax != NULL) ctxt->sax = NULL;
11784 xmlFreeParserInputBuffer(input);
11785 xmlFreeParserCtxt(ctxt);
11786 return(NULL);
11787 }
11788
11789 /*
11790 * plug some encoding conversion routines here.
11791 */
11792 if (xmlPushInput(ctxt, pinput) < 0) {
11793 if (sax != NULL) ctxt->sax = NULL;
11794 xmlFreeParserCtxt(ctxt);
11795 return(NULL);
11796 }
11797 if (enc != XML_CHAR_ENCODING_NONE) {
11798 xmlSwitchEncoding(ctxt, enc);
11799 }
11800
11801 pinput->filename = NULL;
11802 pinput->line = 1;
11803 pinput->col = 1;
11804 pinput->base = ctxt->input->cur;
11805 pinput->cur = ctxt->input->cur;
11806 pinput->free = NULL;
11807
11808 /*
11809 * let's parse that entity knowing it's an external subset.
11810 */
11811 ctxt->inSubset = 2;
11812 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11813 if (ctxt->myDoc == NULL) {
11814 xmlErrMemory(ctxt, "New Doc failed");
11815 return(NULL);
11816 }
11817 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11818 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11819 BAD_CAST "none", BAD_CAST "none");
11820
11821 if ((enc == XML_CHAR_ENCODING_NONE) &&
11822 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
11823 /*
11824 * Get the 4 first bytes and decode the charset
11825 * if enc != XML_CHAR_ENCODING_NONE
11826 * plug some encoding conversion routines.
11827 */
11828 start[0] = RAW;
11829 start[1] = NXT(1);
11830 start[2] = NXT(2);
11831 start[3] = NXT(3);
11832 enc = xmlDetectCharEncoding(start, 4);
11833 if (enc != XML_CHAR_ENCODING_NONE) {
11834 xmlSwitchEncoding(ctxt, enc);
11835 }
11836 }
11837
11838 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11839
11840 if (ctxt->myDoc != NULL) {
11841 if (ctxt->wellFormed) {
11842 ret = ctxt->myDoc->extSubset;
11843 ctxt->myDoc->extSubset = NULL;
11844 if (ret != NULL) {
11845 xmlNodePtr tmp;
11846
11847 ret->doc = NULL;
11848 tmp = ret->children;
11849 while (tmp != NULL) {
11850 tmp->doc = NULL;
11851 tmp = tmp->next;
11852 }
11853 }
11854 } else {
11855 ret = NULL;
11856 }
11857 xmlFreeDoc(ctxt->myDoc);
11858 ctxt->myDoc = NULL;
11859 }
11860 if (sax != NULL) ctxt->sax = NULL;
11861 xmlFreeParserCtxt(ctxt);
11862
11863 return(ret);
11864 }
11865
11866 /**
11867 * xmlSAXParseDTD:
11868 * @sax: the SAX handler block
11869 * @ExternalID: a NAME* containing the External ID of the DTD
11870 * @SystemID: a NAME* containing the URL to the DTD
11871 *
11872 * Load and parse an external subset.
11873 *
11874 * Returns the resulting xmlDtdPtr or NULL in case of error.
11875 */
11876
11877 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)11878 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11879 const xmlChar *SystemID) {
11880 xmlDtdPtr ret = NULL;
11881 xmlParserCtxtPtr ctxt;
11882 xmlParserInputPtr input = NULL;
11883 xmlCharEncoding enc;
11884 xmlChar* systemIdCanonic;
11885
11886 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11887
11888 ctxt = xmlNewParserCtxt();
11889 if (ctxt == NULL) {
11890 return(NULL);
11891 }
11892
11893 /*
11894 * Set-up the SAX context
11895 */
11896 if (sax != NULL) {
11897 if (ctxt->sax != NULL)
11898 xmlFree(ctxt->sax);
11899 ctxt->sax = sax;
11900 ctxt->userData = ctxt;
11901 }
11902
11903 /*
11904 * Canonicalise the system ID
11905 */
11906 systemIdCanonic = xmlCanonicPath(SystemID);
11907 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11908 xmlFreeParserCtxt(ctxt);
11909 return(NULL);
11910 }
11911
11912 /*
11913 * Ask the Entity resolver to load the damn thing
11914 */
11915
11916 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11917 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11918 systemIdCanonic);
11919 if (input == NULL) {
11920 if (sax != NULL) ctxt->sax = NULL;
11921 xmlFreeParserCtxt(ctxt);
11922 if (systemIdCanonic != NULL)
11923 xmlFree(systemIdCanonic);
11924 return(NULL);
11925 }
11926
11927 /*
11928 * plug some encoding conversion routines here.
11929 */
11930 if (xmlPushInput(ctxt, input) < 0) {
11931 if (sax != NULL) ctxt->sax = NULL;
11932 xmlFreeParserCtxt(ctxt);
11933 if (systemIdCanonic != NULL)
11934 xmlFree(systemIdCanonic);
11935 return(NULL);
11936 }
11937 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11938 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11939 xmlSwitchEncoding(ctxt, enc);
11940 }
11941
11942 if (input->filename == NULL)
11943 input->filename = (char *) systemIdCanonic;
11944 else
11945 xmlFree(systemIdCanonic);
11946 input->line = 1;
11947 input->col = 1;
11948 input->base = ctxt->input->cur;
11949 input->cur = ctxt->input->cur;
11950 input->free = NULL;
11951
11952 /*
11953 * let's parse that entity knowing it's an external subset.
11954 */
11955 ctxt->inSubset = 2;
11956 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11957 if (ctxt->myDoc == NULL) {
11958 xmlErrMemory(ctxt, "New Doc failed");
11959 if (sax != NULL) ctxt->sax = NULL;
11960 xmlFreeParserCtxt(ctxt);
11961 return(NULL);
11962 }
11963 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11964 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11965 ExternalID, SystemID);
11966 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11967
11968 if (ctxt->myDoc != NULL) {
11969 if (ctxt->wellFormed) {
11970 ret = ctxt->myDoc->extSubset;
11971 ctxt->myDoc->extSubset = NULL;
11972 if (ret != NULL) {
11973 xmlNodePtr tmp;
11974
11975 ret->doc = NULL;
11976 tmp = ret->children;
11977 while (tmp != NULL) {
11978 tmp->doc = NULL;
11979 tmp = tmp->next;
11980 }
11981 }
11982 } else {
11983 ret = NULL;
11984 }
11985 xmlFreeDoc(ctxt->myDoc);
11986 ctxt->myDoc = NULL;
11987 }
11988 if (sax != NULL) ctxt->sax = NULL;
11989 xmlFreeParserCtxt(ctxt);
11990
11991 return(ret);
11992 }
11993
11994
11995 /**
11996 * xmlParseDTD:
11997 * @ExternalID: a NAME* containing the External ID of the DTD
11998 * @SystemID: a NAME* containing the URL to the DTD
11999 *
12000 * Load and parse an external subset.
12001 *
12002 * Returns the resulting xmlDtdPtr or NULL in case of error.
12003 */
12004
12005 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12006 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12007 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12008 }
12009 #endif /* LIBXML_VALID_ENABLED */
12010
12011 /************************************************************************
12012 * *
12013 * Front ends when parsing an Entity *
12014 * *
12015 ************************************************************************/
12016
12017 /**
12018 * xmlParseCtxtExternalEntity:
12019 * @ctx: the existing parsing context
12020 * @URL: the URL for the entity to load
12021 * @ID: the System ID for the entity to load
12022 * @lst: the return value for the set of parsed nodes
12023 *
12024 * Parse an external general entity within an existing parsing context
12025 * An external general parsed entity is well-formed if it matches the
12026 * production labeled extParsedEnt.
12027 *
12028 * [78] extParsedEnt ::= TextDecl? content
12029 *
12030 * Returns 0 if the entity is well formed, -1 in case of args problem and
12031 * the parser error code otherwise
12032 */
12033
12034 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12035 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12036 const xmlChar *ID, xmlNodePtr *lst) {
12037 xmlParserCtxtPtr ctxt;
12038 xmlDocPtr newDoc;
12039 xmlNodePtr newRoot;
12040 xmlSAXHandlerPtr oldsax = NULL;
12041 int ret = 0;
12042 xmlChar start[4];
12043 xmlCharEncoding enc;
12044 xmlParserInputPtr inputStream;
12045 char *directory = NULL;
12046
12047 if (ctx == NULL) return(-1);
12048
12049 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12050 (ctx->depth > 1024)) {
12051 return(XML_ERR_ENTITY_LOOP);
12052 }
12053
12054 if (lst != NULL)
12055 *lst = NULL;
12056 if ((URL == NULL) && (ID == NULL))
12057 return(-1);
12058 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12059 return(-1);
12060
12061 ctxt = xmlNewParserCtxt();
12062 if (ctxt == NULL) {
12063 return(-1);
12064 }
12065
12066 ctxt->userData = ctxt;
12067 ctxt->_private = ctx->_private;
12068
12069 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12070 if (inputStream == NULL) {
12071 xmlFreeParserCtxt(ctxt);
12072 return(-1);
12073 }
12074
12075 inputPush(ctxt, inputStream);
12076
12077 if ((ctxt->directory == NULL) && (directory == NULL))
12078 directory = xmlParserGetDirectory((char *)URL);
12079 if ((ctxt->directory == NULL) && (directory != NULL))
12080 ctxt->directory = directory;
12081
12082 oldsax = ctxt->sax;
12083 ctxt->sax = ctx->sax;
12084 xmlDetectSAX2(ctxt);
12085 newDoc = xmlNewDoc(BAD_CAST "1.0");
12086 if (newDoc == NULL) {
12087 xmlFreeParserCtxt(ctxt);
12088 return(-1);
12089 }
12090 newDoc->properties = XML_DOC_INTERNAL;
12091 if (ctx->myDoc->dict) {
12092 newDoc->dict = ctx->myDoc->dict;
12093 xmlDictReference(newDoc->dict);
12094 }
12095 if (ctx->myDoc != NULL) {
12096 newDoc->intSubset = ctx->myDoc->intSubset;
12097 newDoc->extSubset = ctx->myDoc->extSubset;
12098 }
12099 if (ctx->myDoc->URL != NULL) {
12100 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12101 }
12102 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12103 if (newRoot == NULL) {
12104 ctxt->sax = oldsax;
12105 xmlFreeParserCtxt(ctxt);
12106 newDoc->intSubset = NULL;
12107 newDoc->extSubset = NULL;
12108 xmlFreeDoc(newDoc);
12109 return(-1);
12110 }
12111 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12112 nodePush(ctxt, newDoc->children);
12113 if (ctx->myDoc == NULL) {
12114 ctxt->myDoc = newDoc;
12115 } else {
12116 ctxt->myDoc = ctx->myDoc;
12117 newDoc->children->doc = ctx->myDoc;
12118 }
12119
12120 /*
12121 * Get the 4 first bytes and decode the charset
12122 * if enc != XML_CHAR_ENCODING_NONE
12123 * plug some encoding conversion routines.
12124 */
12125 GROW
12126 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12127 start[0] = RAW;
12128 start[1] = NXT(1);
12129 start[2] = NXT(2);
12130 start[3] = NXT(3);
12131 enc = xmlDetectCharEncoding(start, 4);
12132 if (enc != XML_CHAR_ENCODING_NONE) {
12133 xmlSwitchEncoding(ctxt, enc);
12134 }
12135 }
12136
12137 /*
12138 * Parse a possible text declaration first
12139 */
12140 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12141 xmlParseTextDecl(ctxt);
12142 /*
12143 * An XML-1.0 document can't reference an entity not XML-1.0
12144 */
12145 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12146 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12147 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12148 "Version mismatch between document and entity\n");
12149 }
12150 }
12151
12152 /*
12153 * Doing validity checking on chunk doesn't make sense
12154 */
12155 ctxt->instate = XML_PARSER_CONTENT;
12156 ctxt->validate = ctx->validate;
12157 ctxt->valid = ctx->valid;
12158 ctxt->loadsubset = ctx->loadsubset;
12159 ctxt->depth = ctx->depth + 1;
12160 ctxt->replaceEntities = ctx->replaceEntities;
12161 if (ctxt->validate) {
12162 ctxt->vctxt.error = ctx->vctxt.error;
12163 ctxt->vctxt.warning = ctx->vctxt.warning;
12164 } else {
12165 ctxt->vctxt.error = NULL;
12166 ctxt->vctxt.warning = NULL;
12167 }
12168 ctxt->vctxt.nodeTab = NULL;
12169 ctxt->vctxt.nodeNr = 0;
12170 ctxt->vctxt.nodeMax = 0;
12171 ctxt->vctxt.node = NULL;
12172 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12173 ctxt->dict = ctx->dict;
12174 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12175 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12176 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12177 ctxt->dictNames = ctx->dictNames;
12178 ctxt->attsDefault = ctx->attsDefault;
12179 ctxt->attsSpecial = ctx->attsSpecial;
12180 ctxt->linenumbers = ctx->linenumbers;
12181
12182 xmlParseContent(ctxt);
12183
12184 ctx->validate = ctxt->validate;
12185 ctx->valid = ctxt->valid;
12186 if ((RAW == '<') && (NXT(1) == '/')) {
12187 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12188 } else if (RAW != 0) {
12189 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12190 }
12191 if (ctxt->node != newDoc->children) {
12192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12193 }
12194
12195 if (!ctxt->wellFormed) {
12196 if (ctxt->errNo == 0)
12197 ret = 1;
12198 else
12199 ret = ctxt->errNo;
12200 } else {
12201 if (lst != NULL) {
12202 xmlNodePtr cur;
12203
12204 /*
12205 * Return the newly created nodeset after unlinking it from
12206 * they pseudo parent.
12207 */
12208 cur = newDoc->children->children;
12209 *lst = cur;
12210 while (cur != NULL) {
12211 cur->parent = NULL;
12212 cur = cur->next;
12213 }
12214 newDoc->children->children = NULL;
12215 }
12216 ret = 0;
12217 }
12218 ctxt->sax = oldsax;
12219 ctxt->dict = NULL;
12220 ctxt->attsDefault = NULL;
12221 ctxt->attsSpecial = NULL;
12222 xmlFreeParserCtxt(ctxt);
12223 newDoc->intSubset = NULL;
12224 newDoc->extSubset = NULL;
12225 xmlFreeDoc(newDoc);
12226
12227 return(ret);
12228 }
12229
12230 /**
12231 * xmlParseExternalEntityPrivate:
12232 * @doc: the document the chunk pertains to
12233 * @oldctxt: the previous parser context if available
12234 * @sax: the SAX handler bloc (possibly NULL)
12235 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12236 * @depth: Used for loop detection, use 0
12237 * @URL: the URL for the entity to load
12238 * @ID: the System ID for the entity to load
12239 * @list: the return value for the set of parsed nodes
12240 *
12241 * Private version of xmlParseExternalEntity()
12242 *
12243 * Returns 0 if the entity is well formed, -1 in case of args problem and
12244 * the parser error code otherwise
12245 */
12246
12247 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12248 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12249 xmlSAXHandlerPtr sax,
12250 void *user_data, int depth, const xmlChar *URL,
12251 const xmlChar *ID, xmlNodePtr *list) {
12252 xmlParserCtxtPtr ctxt;
12253 xmlDocPtr newDoc;
12254 xmlNodePtr newRoot;
12255 xmlSAXHandlerPtr oldsax = NULL;
12256 xmlParserErrors ret = XML_ERR_OK;
12257 xmlChar start[4];
12258 xmlCharEncoding enc;
12259
12260 if (((depth > 40) &&
12261 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12262 (depth > 1024)) {
12263 return(XML_ERR_ENTITY_LOOP);
12264 }
12265
12266 if (list != NULL)
12267 *list = NULL;
12268 if ((URL == NULL) && (ID == NULL))
12269 return(XML_ERR_INTERNAL_ERROR);
12270 if (doc == NULL)
12271 return(XML_ERR_INTERNAL_ERROR);
12272
12273
12274 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
12275 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12276 ctxt->userData = ctxt;
12277 if (oldctxt != NULL) {
12278 ctxt->_private = oldctxt->_private;
12279 ctxt->loadsubset = oldctxt->loadsubset;
12280 ctxt->validate = oldctxt->validate;
12281 ctxt->external = oldctxt->external;
12282 ctxt->record_info = oldctxt->record_info;
12283 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12284 ctxt->node_seq.length = oldctxt->node_seq.length;
12285 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12286 } else {
12287 /*
12288 * Doing validity checking on chunk without context
12289 * doesn't make sense
12290 */
12291 ctxt->_private = NULL;
12292 ctxt->validate = 0;
12293 ctxt->external = 2;
12294 ctxt->loadsubset = 0;
12295 }
12296 if (sax != NULL) {
12297 oldsax = ctxt->sax;
12298 ctxt->sax = sax;
12299 if (user_data != NULL)
12300 ctxt->userData = user_data;
12301 }
12302 xmlDetectSAX2(ctxt);
12303 newDoc = xmlNewDoc(BAD_CAST "1.0");
12304 if (newDoc == NULL) {
12305 ctxt->node_seq.maximum = 0;
12306 ctxt->node_seq.length = 0;
12307 ctxt->node_seq.buffer = NULL;
12308 xmlFreeParserCtxt(ctxt);
12309 return(XML_ERR_INTERNAL_ERROR);
12310 }
12311 newDoc->properties = XML_DOC_INTERNAL;
12312 newDoc->intSubset = doc->intSubset;
12313 newDoc->extSubset = doc->extSubset;
12314 newDoc->dict = doc->dict;
12315 xmlDictReference(newDoc->dict);
12316
12317 if (doc->URL != NULL) {
12318 newDoc->URL = xmlStrdup(doc->URL);
12319 }
12320 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12321 if (newRoot == NULL) {
12322 if (sax != NULL)
12323 ctxt->sax = oldsax;
12324 ctxt->node_seq.maximum = 0;
12325 ctxt->node_seq.length = 0;
12326 ctxt->node_seq.buffer = NULL;
12327 xmlFreeParserCtxt(ctxt);
12328 newDoc->intSubset = NULL;
12329 newDoc->extSubset = NULL;
12330 xmlFreeDoc(newDoc);
12331 return(XML_ERR_INTERNAL_ERROR);
12332 }
12333 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12334 nodePush(ctxt, newDoc->children);
12335 ctxt->myDoc = doc;
12336 newRoot->doc = doc;
12337
12338 /*
12339 * Get the 4 first bytes and decode the charset
12340 * if enc != XML_CHAR_ENCODING_NONE
12341 * plug some encoding conversion routines.
12342 */
12343 GROW;
12344 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12345 start[0] = RAW;
12346 start[1] = NXT(1);
12347 start[2] = NXT(2);
12348 start[3] = NXT(3);
12349 enc = xmlDetectCharEncoding(start, 4);
12350 if (enc != XML_CHAR_ENCODING_NONE) {
12351 xmlSwitchEncoding(ctxt, enc);
12352 }
12353 }
12354
12355 /*
12356 * Parse a possible text declaration first
12357 */
12358 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12359 xmlParseTextDecl(ctxt);
12360 }
12361
12362 ctxt->instate = XML_PARSER_CONTENT;
12363 ctxt->depth = depth;
12364
12365 xmlParseContent(ctxt);
12366
12367 if ((RAW == '<') && (NXT(1) == '/')) {
12368 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12369 } else if (RAW != 0) {
12370 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12371 }
12372 if (ctxt->node != newDoc->children) {
12373 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12374 }
12375
12376 if (!ctxt->wellFormed) {
12377 if (ctxt->errNo == 0)
12378 ret = XML_ERR_INTERNAL_ERROR;
12379 else
12380 ret = (xmlParserErrors)ctxt->errNo;
12381 } else {
12382 if (list != NULL) {
12383 xmlNodePtr cur;
12384
12385 /*
12386 * Return the newly created nodeset after unlinking it from
12387 * they pseudo parent.
12388 */
12389 cur = newDoc->children->children;
12390 *list = cur;
12391 while (cur != NULL) {
12392 cur->parent = NULL;
12393 cur = cur->next;
12394 }
12395 newDoc->children->children = NULL;
12396 }
12397 ret = XML_ERR_OK;
12398 }
12399
12400 /*
12401 * Record in the parent context the number of entities replacement
12402 * done when parsing that reference.
12403 */
12404 oldctxt->nbentities += ctxt->nbentities;
12405 /*
12406 * Also record the size of the entity parsed
12407 */
12408 if (ctxt->input != NULL) {
12409 oldctxt->sizeentities += ctxt->input->consumed;
12410 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12411 }
12412 /*
12413 * And record the last error if any
12414 */
12415 if (ctxt->lastError.code != XML_ERR_OK)
12416 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12417
12418 if (sax != NULL)
12419 ctxt->sax = oldsax;
12420 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12421 oldctxt->node_seq.length = ctxt->node_seq.length;
12422 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12423 ctxt->node_seq.maximum = 0;
12424 ctxt->node_seq.length = 0;
12425 ctxt->node_seq.buffer = NULL;
12426 xmlFreeParserCtxt(ctxt);
12427 newDoc->intSubset = NULL;
12428 newDoc->extSubset = NULL;
12429 xmlFreeDoc(newDoc);
12430
12431 return(ret);
12432 }
12433
12434 #ifdef LIBXML_SAX1_ENABLED
12435 /**
12436 * xmlParseExternalEntity:
12437 * @doc: the document the chunk pertains to
12438 * @sax: the SAX handler bloc (possibly NULL)
12439 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12440 * @depth: Used for loop detection, use 0
12441 * @URL: the URL for the entity to load
12442 * @ID: the System ID for the entity to load
12443 * @lst: the return value for the set of parsed nodes
12444 *
12445 * Parse an external general entity
12446 * An external general parsed entity is well-formed if it matches the
12447 * production labeled extParsedEnt.
12448 *
12449 * [78] extParsedEnt ::= TextDecl? content
12450 *
12451 * Returns 0 if the entity is well formed, -1 in case of args problem and
12452 * the parser error code otherwise
12453 */
12454
12455 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12456 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12457 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12458 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12459 ID, lst));
12460 }
12461
12462 /**
12463 * xmlParseBalancedChunkMemory:
12464 * @doc: the document the chunk pertains to
12465 * @sax: the SAX handler bloc (possibly NULL)
12466 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12467 * @depth: Used for loop detection, use 0
12468 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12469 * @lst: the return value for the set of parsed nodes
12470 *
12471 * Parse a well-balanced chunk of an XML document
12472 * called by the parser
12473 * The allowed sequence for the Well Balanced Chunk is the one defined by
12474 * the content production in the XML grammar:
12475 *
12476 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12477 *
12478 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12479 * the parser error code otherwise
12480 */
12481
12482 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12483 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12484 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12485 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12486 depth, string, lst, 0 );
12487 }
12488 #endif /* LIBXML_SAX1_ENABLED */
12489
12490 /**
12491 * xmlParseBalancedChunkMemoryInternal:
12492 * @oldctxt: the existing parsing context
12493 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12494 * @user_data: the user data field for the parser context
12495 * @lst: the return value for the set of parsed nodes
12496 *
12497 *
12498 * Parse a well-balanced chunk of an XML document
12499 * called by the parser
12500 * The allowed sequence for the Well Balanced Chunk is the one defined by
12501 * the content production in the XML grammar:
12502 *
12503 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12504 *
12505 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12506 * error code otherwise
12507 *
12508 * In case recover is set to 1, the nodelist will not be empty even if
12509 * the parsed chunk is not well balanced.
12510 */
12511 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12512 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12513 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12514 xmlParserCtxtPtr ctxt;
12515 xmlDocPtr newDoc = NULL;
12516 xmlNodePtr newRoot;
12517 xmlSAXHandlerPtr oldsax = NULL;
12518 xmlNodePtr content = NULL;
12519 xmlNodePtr last = NULL;
12520 int size;
12521 xmlParserErrors ret = XML_ERR_OK;
12522
12523 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12524 (oldctxt->depth > 1024)) {
12525 return(XML_ERR_ENTITY_LOOP);
12526 }
12527
12528
12529 if (lst != NULL)
12530 *lst = NULL;
12531 if (string == NULL)
12532 return(XML_ERR_INTERNAL_ERROR);
12533
12534 size = xmlStrlen(string);
12535
12536 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12537 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12538 if (user_data != NULL)
12539 ctxt->userData = user_data;
12540 else
12541 ctxt->userData = ctxt;
12542 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12543 ctxt->dict = oldctxt->dict;
12544 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12545 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12546 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12547
12548 ctxt->nsParent = oldctxt;
12549
12550 oldsax = ctxt->sax;
12551 ctxt->sax = oldctxt->sax;
12552 xmlDetectSAX2(ctxt);
12553 ctxt->replaceEntities = oldctxt->replaceEntities;
12554 ctxt->options = oldctxt->options;
12555
12556 ctxt->_private = oldctxt->_private;
12557 if (oldctxt->myDoc == NULL) {
12558 newDoc = xmlNewDoc(BAD_CAST "1.0");
12559 if (newDoc == NULL) {
12560 ctxt->sax = oldsax;
12561 ctxt->dict = NULL;
12562 xmlFreeParserCtxt(ctxt);
12563 return(XML_ERR_INTERNAL_ERROR);
12564 }
12565 newDoc->properties = XML_DOC_INTERNAL;
12566 newDoc->dict = ctxt->dict;
12567 xmlDictReference(newDoc->dict);
12568 ctxt->myDoc = newDoc;
12569 } else {
12570 ctxt->myDoc = oldctxt->myDoc;
12571 content = ctxt->myDoc->children;
12572 last = ctxt->myDoc->last;
12573 }
12574 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12575 if (newRoot == NULL) {
12576 ctxt->sax = oldsax;
12577 ctxt->dict = NULL;
12578 xmlFreeParserCtxt(ctxt);
12579 if (newDoc != NULL) {
12580 xmlFreeDoc(newDoc);
12581 }
12582 return(XML_ERR_INTERNAL_ERROR);
12583 }
12584 ctxt->myDoc->children = NULL;
12585 ctxt->myDoc->last = NULL;
12586 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12587 nodePush(ctxt, ctxt->myDoc->children);
12588 ctxt->instate = XML_PARSER_CONTENT;
12589 ctxt->depth = oldctxt->depth + 1;
12590
12591 ctxt->validate = 0;
12592 ctxt->loadsubset = oldctxt->loadsubset;
12593 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12594 /*
12595 * ID/IDREF registration will be done in xmlValidateElement below
12596 */
12597 ctxt->loadsubset |= XML_SKIP_IDS;
12598 }
12599 ctxt->dictNames = oldctxt->dictNames;
12600 ctxt->attsDefault = oldctxt->attsDefault;
12601 ctxt->attsSpecial = oldctxt->attsSpecial;
12602
12603 xmlParseContent(ctxt);
12604 if ((RAW == '<') && (NXT(1) == '/')) {
12605 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12606 } else if (RAW != 0) {
12607 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12608 }
12609 if (ctxt->node != ctxt->myDoc->children) {
12610 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12611 }
12612
12613 if (!ctxt->wellFormed) {
12614 if (ctxt->errNo == 0)
12615 ret = XML_ERR_INTERNAL_ERROR;
12616 else
12617 ret = (xmlParserErrors)ctxt->errNo;
12618 } else {
12619 ret = XML_ERR_OK;
12620 }
12621
12622 if ((lst != NULL) && (ret == XML_ERR_OK)) {
12623 xmlNodePtr cur;
12624
12625 /*
12626 * Return the newly created nodeset after unlinking it from
12627 * they pseudo parent.
12628 */
12629 cur = ctxt->myDoc->children->children;
12630 *lst = cur;
12631 while (cur != NULL) {
12632 #ifdef LIBXML_VALID_ENABLED
12633 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12634 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12635 (cur->type == XML_ELEMENT_NODE)) {
12636 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12637 oldctxt->myDoc, cur);
12638 }
12639 #endif /* LIBXML_VALID_ENABLED */
12640 cur->parent = NULL;
12641 cur = cur->next;
12642 }
12643 ctxt->myDoc->children->children = NULL;
12644 }
12645 if (ctxt->myDoc != NULL) {
12646 xmlFreeNode(ctxt->myDoc->children);
12647 ctxt->myDoc->children = content;
12648 ctxt->myDoc->last = last;
12649 }
12650
12651 /*
12652 * Record in the parent context the number of entities replacement
12653 * done when parsing that reference.
12654 */
12655 oldctxt->nbentities += ctxt->nbentities;
12656 /*
12657 * Also record the last error if any
12658 */
12659 if (ctxt->lastError.code != XML_ERR_OK)
12660 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12661
12662 ctxt->sax = oldsax;
12663 ctxt->dict = NULL;
12664 ctxt->attsDefault = NULL;
12665 ctxt->attsSpecial = NULL;
12666 xmlFreeParserCtxt(ctxt);
12667 if (newDoc != NULL) {
12668 xmlFreeDoc(newDoc);
12669 }
12670
12671 return(ret);
12672 }
12673
12674 /**
12675 * xmlParseInNodeContext:
12676 * @node: the context node
12677 * @data: the input string
12678 * @datalen: the input string length in bytes
12679 * @options: a combination of xmlParserOption
12680 * @lst: the return value for the set of parsed nodes
12681 *
12682 * Parse a well-balanced chunk of an XML document
12683 * within the context (DTD, namespaces, etc ...) of the given node.
12684 *
12685 * The allowed sequence for the data is a Well Balanced Chunk defined by
12686 * the content production in the XML grammar:
12687 *
12688 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12689 *
12690 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12691 * error code otherwise
12692 */
12693 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12694 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12695 int options, xmlNodePtr *lst) {
12696 #ifdef SAX2
12697 xmlParserCtxtPtr ctxt;
12698 xmlDocPtr doc = NULL;
12699 xmlNodePtr fake, cur;
12700 int nsnr = 0;
12701
12702 xmlParserErrors ret = XML_ERR_OK;
12703
12704 /*
12705 * check all input parameters, grab the document
12706 */
12707 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12708 return(XML_ERR_INTERNAL_ERROR);
12709 switch (node->type) {
12710 case XML_ELEMENT_NODE:
12711 case XML_ATTRIBUTE_NODE:
12712 case XML_TEXT_NODE:
12713 case XML_CDATA_SECTION_NODE:
12714 case XML_ENTITY_REF_NODE:
12715 case XML_PI_NODE:
12716 case XML_COMMENT_NODE:
12717 case XML_DOCUMENT_NODE:
12718 case XML_HTML_DOCUMENT_NODE:
12719 break;
12720 default:
12721 return(XML_ERR_INTERNAL_ERROR);
12722
12723 }
12724 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12725 (node->type != XML_DOCUMENT_NODE) &&
12726 (node->type != XML_HTML_DOCUMENT_NODE))
12727 node = node->parent;
12728 if (node == NULL)
12729 return(XML_ERR_INTERNAL_ERROR);
12730 if (node->type == XML_ELEMENT_NODE)
12731 doc = node->doc;
12732 else
12733 doc = (xmlDocPtr) node;
12734 if (doc == NULL)
12735 return(XML_ERR_INTERNAL_ERROR);
12736
12737 /*
12738 * allocate a context and set-up everything not related to the
12739 * node position in the tree
12740 */
12741 if (doc->type == XML_DOCUMENT_NODE)
12742 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12743 #ifdef LIBXML_HTML_ENABLED
12744 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12745 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12746 #endif
12747 else
12748 return(XML_ERR_INTERNAL_ERROR);
12749
12750 if (ctxt == NULL)
12751 return(XML_ERR_NO_MEMORY);
12752 fake = xmlNewComment(NULL);
12753 if (fake == NULL) {
12754 xmlFreeParserCtxt(ctxt);
12755 return(XML_ERR_NO_MEMORY);
12756 }
12757 xmlAddChild(node, fake);
12758
12759 /*
12760 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12761 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12762 * we must wait until the last moment to free the original one.
12763 */
12764 if (doc->dict != NULL) {
12765 if (ctxt->dict != NULL)
12766 xmlDictFree(ctxt->dict);
12767 ctxt->dict = doc->dict;
12768 } else
12769 options |= XML_PARSE_NODICT;
12770
12771 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
12772 xmlDetectSAX2(ctxt);
12773 ctxt->myDoc = doc;
12774
12775 if (node->type == XML_ELEMENT_NODE) {
12776 nodePush(ctxt, node);
12777 /*
12778 * initialize the SAX2 namespaces stack
12779 */
12780 cur = node;
12781 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12782 xmlNsPtr ns = cur->nsDef;
12783 const xmlChar *iprefix, *ihref;
12784
12785 while (ns != NULL) {
12786 if (ctxt->dict) {
12787 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12788 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12789 } else {
12790 iprefix = ns->prefix;
12791 ihref = ns->href;
12792 }
12793
12794 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12795 nsPush(ctxt, iprefix, ihref);
12796 nsnr++;
12797 }
12798 ns = ns->next;
12799 }
12800 cur = cur->parent;
12801 }
12802 ctxt->instate = XML_PARSER_CONTENT;
12803 }
12804
12805 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12806 /*
12807 * ID/IDREF registration will be done in xmlValidateElement below
12808 */
12809 ctxt->loadsubset |= XML_SKIP_IDS;
12810 }
12811
12812 #ifdef LIBXML_HTML_ENABLED
12813 if (doc->type == XML_HTML_DOCUMENT_NODE)
12814 __htmlParseContent(ctxt);
12815 else
12816 #endif
12817 xmlParseContent(ctxt);
12818
12819 nsPop(ctxt, nsnr);
12820 if ((RAW == '<') && (NXT(1) == '/')) {
12821 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12822 } else if (RAW != 0) {
12823 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12824 }
12825 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12826 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12827 ctxt->wellFormed = 0;
12828 }
12829
12830 if (!ctxt->wellFormed) {
12831 if (ctxt->errNo == 0)
12832 ret = XML_ERR_INTERNAL_ERROR;
12833 else
12834 ret = (xmlParserErrors)ctxt->errNo;
12835 } else {
12836 ret = XML_ERR_OK;
12837 }
12838
12839 /*
12840 * Return the newly created nodeset after unlinking it from
12841 * the pseudo sibling.
12842 */
12843
12844 cur = fake->next;
12845 fake->next = NULL;
12846 node->last = fake;
12847
12848 if (cur != NULL) {
12849 cur->prev = NULL;
12850 }
12851
12852 *lst = cur;
12853
12854 while (cur != NULL) {
12855 cur->parent = NULL;
12856 cur = cur->next;
12857 }
12858
12859 xmlUnlinkNode(fake);
12860 xmlFreeNode(fake);
12861
12862
12863 if (ret != XML_ERR_OK) {
12864 xmlFreeNodeList(*lst);
12865 *lst = NULL;
12866 }
12867
12868 if (doc->dict != NULL)
12869 ctxt->dict = NULL;
12870 xmlFreeParserCtxt(ctxt);
12871
12872 return(ret);
12873 #else /* !SAX2 */
12874 return(XML_ERR_INTERNAL_ERROR);
12875 #endif
12876 }
12877
12878 #ifdef LIBXML_SAX1_ENABLED
12879 /**
12880 * xmlParseBalancedChunkMemoryRecover:
12881 * @doc: the document the chunk pertains to
12882 * @sax: the SAX handler bloc (possibly NULL)
12883 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12884 * @depth: Used for loop detection, use 0
12885 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12886 * @lst: the return value for the set of parsed nodes
12887 * @recover: return nodes even if the data is broken (use 0)
12888 *
12889 *
12890 * Parse a well-balanced chunk of an XML document
12891 * called by the parser
12892 * The allowed sequence for the Well Balanced Chunk is the one defined by
12893 * the content production in the XML grammar:
12894 *
12895 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12896 *
12897 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12898 * the parser error code otherwise
12899 *
12900 * In case recover is set to 1, the nodelist will not be empty even if
12901 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12902 * some extent.
12903 */
12904 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)12905 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12906 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12907 int recover) {
12908 xmlParserCtxtPtr ctxt;
12909 xmlDocPtr newDoc;
12910 xmlSAXHandlerPtr oldsax = NULL;
12911 xmlNodePtr content, newRoot;
12912 int size;
12913 int ret = 0;
12914
12915 if (depth > 40) {
12916 return(XML_ERR_ENTITY_LOOP);
12917 }
12918
12919
12920 if (lst != NULL)
12921 *lst = NULL;
12922 if (string == NULL)
12923 return(-1);
12924
12925 size = xmlStrlen(string);
12926
12927 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12928 if (ctxt == NULL) return(-1);
12929 ctxt->userData = ctxt;
12930 if (sax != NULL) {
12931 oldsax = ctxt->sax;
12932 ctxt->sax = sax;
12933 if (user_data != NULL)
12934 ctxt->userData = user_data;
12935 }
12936 newDoc = xmlNewDoc(BAD_CAST "1.0");
12937 if (newDoc == NULL) {
12938 xmlFreeParserCtxt(ctxt);
12939 return(-1);
12940 }
12941 newDoc->properties = XML_DOC_INTERNAL;
12942 if ((doc != NULL) && (doc->dict != NULL)) {
12943 xmlDictFree(ctxt->dict);
12944 ctxt->dict = doc->dict;
12945 xmlDictReference(ctxt->dict);
12946 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12947 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12948 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12949 ctxt->dictNames = 1;
12950 } else {
12951 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
12952 }
12953 if (doc != NULL) {
12954 newDoc->intSubset = doc->intSubset;
12955 newDoc->extSubset = doc->extSubset;
12956 }
12957 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12958 if (newRoot == NULL) {
12959 if (sax != NULL)
12960 ctxt->sax = oldsax;
12961 xmlFreeParserCtxt(ctxt);
12962 newDoc->intSubset = NULL;
12963 newDoc->extSubset = NULL;
12964 xmlFreeDoc(newDoc);
12965 return(-1);
12966 }
12967 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12968 nodePush(ctxt, newRoot);
12969 if (doc == NULL) {
12970 ctxt->myDoc = newDoc;
12971 } else {
12972 ctxt->myDoc = newDoc;
12973 newDoc->children->doc = doc;
12974 /* Ensure that doc has XML spec namespace */
12975 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12976 newDoc->oldNs = doc->oldNs;
12977 }
12978 ctxt->instate = XML_PARSER_CONTENT;
12979 ctxt->depth = depth;
12980
12981 /*
12982 * Doing validity checking on chunk doesn't make sense
12983 */
12984 ctxt->validate = 0;
12985 ctxt->loadsubset = 0;
12986 xmlDetectSAX2(ctxt);
12987
12988 if ( doc != NULL ){
12989 content = doc->children;
12990 doc->children = NULL;
12991 xmlParseContent(ctxt);
12992 doc->children = content;
12993 }
12994 else {
12995 xmlParseContent(ctxt);
12996 }
12997 if ((RAW == '<') && (NXT(1) == '/')) {
12998 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12999 } else if (RAW != 0) {
13000 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13001 }
13002 if (ctxt->node != newDoc->children) {
13003 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13004 }
13005
13006 if (!ctxt->wellFormed) {
13007 if (ctxt->errNo == 0)
13008 ret = 1;
13009 else
13010 ret = ctxt->errNo;
13011 } else {
13012 ret = 0;
13013 }
13014
13015 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13016 xmlNodePtr cur;
13017
13018 /*
13019 * Return the newly created nodeset after unlinking it from
13020 * they pseudo parent.
13021 */
13022 cur = newDoc->children->children;
13023 *lst = cur;
13024 while (cur != NULL) {
13025 xmlSetTreeDoc(cur, doc);
13026 cur->parent = NULL;
13027 cur = cur->next;
13028 }
13029 newDoc->children->children = NULL;
13030 }
13031
13032 if (sax != NULL)
13033 ctxt->sax = oldsax;
13034 xmlFreeParserCtxt(ctxt);
13035 newDoc->intSubset = NULL;
13036 newDoc->extSubset = NULL;
13037 newDoc->oldNs = NULL;
13038 xmlFreeDoc(newDoc);
13039
13040 return(ret);
13041 }
13042
13043 /**
13044 * xmlSAXParseEntity:
13045 * @sax: the SAX handler block
13046 * @filename: the filename
13047 *
13048 * parse an XML external entity out of context and build a tree.
13049 * It use the given SAX function block to handle the parsing callback.
13050 * If sax is NULL, fallback to the default DOM tree building routines.
13051 *
13052 * [78] extParsedEnt ::= TextDecl? content
13053 *
13054 * This correspond to a "Well Balanced" chunk
13055 *
13056 * Returns the resulting document tree
13057 */
13058
13059 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13060 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13061 xmlDocPtr ret;
13062 xmlParserCtxtPtr ctxt;
13063
13064 ctxt = xmlCreateFileParserCtxt(filename);
13065 if (ctxt == NULL) {
13066 return(NULL);
13067 }
13068 if (sax != NULL) {
13069 if (ctxt->sax != NULL)
13070 xmlFree(ctxt->sax);
13071 ctxt->sax = sax;
13072 ctxt->userData = NULL;
13073 }
13074
13075 xmlParseExtParsedEnt(ctxt);
13076
13077 if (ctxt->wellFormed)
13078 ret = ctxt->myDoc;
13079 else {
13080 ret = NULL;
13081 xmlFreeDoc(ctxt->myDoc);
13082 ctxt->myDoc = NULL;
13083 }
13084 if (sax != NULL)
13085 ctxt->sax = NULL;
13086 xmlFreeParserCtxt(ctxt);
13087
13088 return(ret);
13089 }
13090
13091 /**
13092 * xmlParseEntity:
13093 * @filename: the filename
13094 *
13095 * parse an XML external entity out of context and build a tree.
13096 *
13097 * [78] extParsedEnt ::= TextDecl? content
13098 *
13099 * This correspond to a "Well Balanced" chunk
13100 *
13101 * Returns the resulting document tree
13102 */
13103
13104 xmlDocPtr
xmlParseEntity(const char * filename)13105 xmlParseEntity(const char *filename) {
13106 return(xmlSAXParseEntity(NULL, filename));
13107 }
13108 #endif /* LIBXML_SAX1_ENABLED */
13109
13110 /**
13111 * xmlCreateEntityParserCtxt:
13112 * @URL: the entity URL
13113 * @ID: the entity PUBLIC ID
13114 * @base: a possible base for the target URI
13115 *
13116 * Create a parser context for an external entity
13117 * Automatic support for ZLIB/Compress compressed document is provided
13118 * by default if found at compile-time.
13119 *
13120 * Returns the new parser context or NULL
13121 */
13122 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13123 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13124 const xmlChar *base) {
13125 xmlParserCtxtPtr ctxt;
13126 xmlParserInputPtr inputStream;
13127 char *directory = NULL;
13128 xmlChar *uri;
13129
13130 ctxt = xmlNewParserCtxt();
13131 if (ctxt == NULL) {
13132 return(NULL);
13133 }
13134
13135 uri = xmlBuildURI(URL, base);
13136
13137 if (uri == NULL) {
13138 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13139 if (inputStream == NULL) {
13140 xmlFreeParserCtxt(ctxt);
13141 return(NULL);
13142 }
13143
13144 inputPush(ctxt, inputStream);
13145
13146 if ((ctxt->directory == NULL) && (directory == NULL))
13147 directory = xmlParserGetDirectory((char *)URL);
13148 if ((ctxt->directory == NULL) && (directory != NULL))
13149 ctxt->directory = directory;
13150 } else {
13151 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13152 if (inputStream == NULL) {
13153 xmlFree(uri);
13154 xmlFreeParserCtxt(ctxt);
13155 return(NULL);
13156 }
13157
13158 inputPush(ctxt, inputStream);
13159
13160 if ((ctxt->directory == NULL) && (directory == NULL))
13161 directory = xmlParserGetDirectory((char *)uri);
13162 if ((ctxt->directory == NULL) && (directory != NULL))
13163 ctxt->directory = directory;
13164 xmlFree(uri);
13165 }
13166 return(ctxt);
13167 }
13168
13169 /************************************************************************
13170 * *
13171 * Front ends when parsing from a file *
13172 * *
13173 ************************************************************************/
13174
13175 /**
13176 * xmlCreateURLParserCtxt:
13177 * @filename: the filename or URL
13178 * @options: a combination of xmlParserOption
13179 *
13180 * Create a parser context for a file or URL content.
13181 * Automatic support for ZLIB/Compress compressed document is provided
13182 * by default if found at compile-time and for file accesses
13183 *
13184 * Returns the new parser context or NULL
13185 */
13186 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13187 xmlCreateURLParserCtxt(const char *filename, int options)
13188 {
13189 xmlParserCtxtPtr ctxt;
13190 xmlParserInputPtr inputStream;
13191 char *directory = NULL;
13192
13193 ctxt = xmlNewParserCtxt();
13194 if (ctxt == NULL) {
13195 xmlErrMemory(NULL, "cannot allocate parser context");
13196 return(NULL);
13197 }
13198
13199 if (options)
13200 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13201 ctxt->linenumbers = 1;
13202
13203 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13204 if (inputStream == NULL) {
13205 xmlFreeParserCtxt(ctxt);
13206 return(NULL);
13207 }
13208
13209 inputPush(ctxt, inputStream);
13210 if ((ctxt->directory == NULL) && (directory == NULL))
13211 directory = xmlParserGetDirectory(filename);
13212 if ((ctxt->directory == NULL) && (directory != NULL))
13213 ctxt->directory = directory;
13214
13215 return(ctxt);
13216 }
13217
13218 /**
13219 * xmlCreateFileParserCtxt:
13220 * @filename: the filename
13221 *
13222 * Create a parser context for a file content.
13223 * Automatic support for ZLIB/Compress compressed document is provided
13224 * by default if found at compile-time.
13225 *
13226 * Returns the new parser context or NULL
13227 */
13228 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13229 xmlCreateFileParserCtxt(const char *filename)
13230 {
13231 return(xmlCreateURLParserCtxt(filename, 0));
13232 }
13233
13234 #ifdef LIBXML_SAX1_ENABLED
13235 /**
13236 * xmlSAXParseFileWithData:
13237 * @sax: the SAX handler block
13238 * @filename: the filename
13239 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13240 * documents
13241 * @data: the userdata
13242 *
13243 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13244 * compressed document is provided by default if found at compile-time.
13245 * It use the given SAX function block to handle the parsing callback.
13246 * If sax is NULL, fallback to the default DOM tree building routines.
13247 *
13248 * User data (void *) is stored within the parser context in the
13249 * context's _private member, so it is available nearly everywhere in libxml
13250 *
13251 * Returns the resulting document tree
13252 */
13253
13254 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13255 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13256 int recovery, void *data) {
13257 xmlDocPtr ret;
13258 xmlParserCtxtPtr ctxt;
13259
13260 xmlInitParser();
13261
13262 ctxt = xmlCreateFileParserCtxt(filename);
13263 if (ctxt == NULL) {
13264 return(NULL);
13265 }
13266 if (sax != NULL) {
13267 if (ctxt->sax != NULL)
13268 xmlFree(ctxt->sax);
13269 ctxt->sax = sax;
13270 }
13271 xmlDetectSAX2(ctxt);
13272 if (data!=NULL) {
13273 ctxt->_private = data;
13274 }
13275
13276 if (ctxt->directory == NULL)
13277 ctxt->directory = xmlParserGetDirectory(filename);
13278
13279 ctxt->recovery = recovery;
13280
13281 xmlParseDocument(ctxt);
13282
13283 if ((ctxt->wellFormed) || recovery) {
13284 ret = ctxt->myDoc;
13285 if (ret != NULL) {
13286 if (ctxt->input->buf->compressed > 0)
13287 ret->compression = 9;
13288 else
13289 ret->compression = ctxt->input->buf->compressed;
13290 }
13291 }
13292 else {
13293 ret = NULL;
13294 xmlFreeDoc(ctxt->myDoc);
13295 ctxt->myDoc = NULL;
13296 }
13297 if (sax != NULL)
13298 ctxt->sax = NULL;
13299 xmlFreeParserCtxt(ctxt);
13300
13301 return(ret);
13302 }
13303
13304 /**
13305 * xmlSAXParseFile:
13306 * @sax: the SAX handler block
13307 * @filename: the filename
13308 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13309 * documents
13310 *
13311 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13312 * compressed document is provided by default if found at compile-time.
13313 * It use the given SAX function block to handle the parsing callback.
13314 * If sax is NULL, fallback to the default DOM tree building routines.
13315 *
13316 * Returns the resulting document tree
13317 */
13318
13319 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13320 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13321 int recovery) {
13322 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13323 }
13324
13325 /**
13326 * xmlRecoverDoc:
13327 * @cur: a pointer to an array of xmlChar
13328 *
13329 * parse an XML in-memory document and build a tree.
13330 * In the case the document is not Well Formed, a attempt to build a
13331 * tree is tried anyway
13332 *
13333 * Returns the resulting document tree or NULL in case of failure
13334 */
13335
13336 xmlDocPtr
xmlRecoverDoc(xmlChar * cur)13337 xmlRecoverDoc(xmlChar *cur) {
13338 return(xmlSAXParseDoc(NULL, cur, 1));
13339 }
13340
13341 /**
13342 * xmlParseFile:
13343 * @filename: the filename
13344 *
13345 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13346 * compressed document is provided by default if found at compile-time.
13347 *
13348 * Returns the resulting document tree if the file was wellformed,
13349 * NULL otherwise.
13350 */
13351
13352 xmlDocPtr
xmlParseFile(const char * filename)13353 xmlParseFile(const char *filename) {
13354 return(xmlSAXParseFile(NULL, filename, 0));
13355 }
13356
13357 /**
13358 * xmlRecoverFile:
13359 * @filename: the filename
13360 *
13361 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13362 * compressed document is provided by default if found at compile-time.
13363 * In the case the document is not Well Formed, it attempts to build
13364 * a tree anyway
13365 *
13366 * Returns the resulting document tree or NULL in case of failure
13367 */
13368
13369 xmlDocPtr
xmlRecoverFile(const char * filename)13370 xmlRecoverFile(const char *filename) {
13371 return(xmlSAXParseFile(NULL, filename, 1));
13372 }
13373
13374
13375 /**
13376 * xmlSetupParserForBuffer:
13377 * @ctxt: an XML parser context
13378 * @buffer: a xmlChar * buffer
13379 * @filename: a file name
13380 *
13381 * Setup the parser context to parse a new buffer; Clears any prior
13382 * contents from the parser context. The buffer parameter must not be
13383 * NULL, but the filename parameter can be
13384 */
13385 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13386 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13387 const char* filename)
13388 {
13389 xmlParserInputPtr input;
13390
13391 if ((ctxt == NULL) || (buffer == NULL))
13392 return;
13393
13394 input = xmlNewInputStream(ctxt);
13395 if (input == NULL) {
13396 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13397 xmlClearParserCtxt(ctxt);
13398 return;
13399 }
13400
13401 xmlClearParserCtxt(ctxt);
13402 if (filename != NULL)
13403 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13404 input->base = buffer;
13405 input->cur = buffer;
13406 input->end = &buffer[xmlStrlen(buffer)];
13407 inputPush(ctxt, input);
13408 }
13409
13410 /**
13411 * xmlSAXUserParseFile:
13412 * @sax: a SAX handler
13413 * @user_data: The user data returned on SAX callbacks
13414 * @filename: a file name
13415 *
13416 * parse an XML file and call the given SAX handler routines.
13417 * Automatic support for ZLIB/Compress compressed document is provided
13418 *
13419 * Returns 0 in case of success or a error number otherwise
13420 */
13421 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13422 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13423 const char *filename) {
13424 int ret = 0;
13425 xmlParserCtxtPtr ctxt;
13426
13427 ctxt = xmlCreateFileParserCtxt(filename);
13428 if (ctxt == NULL) return -1;
13429 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13430 xmlFree(ctxt->sax);
13431 ctxt->sax = sax;
13432 xmlDetectSAX2(ctxt);
13433
13434 if (user_data != NULL)
13435 ctxt->userData = user_data;
13436
13437 xmlParseDocument(ctxt);
13438
13439 if (ctxt->wellFormed)
13440 ret = 0;
13441 else {
13442 if (ctxt->errNo != 0)
13443 ret = ctxt->errNo;
13444 else
13445 ret = -1;
13446 }
13447 if (sax != NULL)
13448 ctxt->sax = NULL;
13449 if (ctxt->myDoc != NULL) {
13450 xmlFreeDoc(ctxt->myDoc);
13451 ctxt->myDoc = NULL;
13452 }
13453 xmlFreeParserCtxt(ctxt);
13454
13455 return ret;
13456 }
13457 #endif /* LIBXML_SAX1_ENABLED */
13458
13459 /************************************************************************
13460 * *
13461 * Front ends when parsing from memory *
13462 * *
13463 ************************************************************************/
13464
13465 /**
13466 * xmlCreateMemoryParserCtxt:
13467 * @buffer: a pointer to a char array
13468 * @size: the size of the array
13469 *
13470 * Create a parser context for an XML in-memory document.
13471 *
13472 * Returns the new parser context or NULL
13473 */
13474 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)13475 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13476 xmlParserCtxtPtr ctxt;
13477 xmlParserInputPtr input;
13478 xmlParserInputBufferPtr buf;
13479
13480 if (buffer == NULL)
13481 return(NULL);
13482 if (size <= 0)
13483 return(NULL);
13484
13485 ctxt = xmlNewParserCtxt();
13486 if (ctxt == NULL)
13487 return(NULL);
13488
13489 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13490 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13491 if (buf == NULL) {
13492 xmlFreeParserCtxt(ctxt);
13493 return(NULL);
13494 }
13495
13496 input = xmlNewInputStream(ctxt);
13497 if (input == NULL) {
13498 xmlFreeParserInputBuffer(buf);
13499 xmlFreeParserCtxt(ctxt);
13500 return(NULL);
13501 }
13502
13503 input->filename = NULL;
13504 input->buf = buf;
13505 input->base = input->buf->buffer->content;
13506 input->cur = input->buf->buffer->content;
13507 input->end = &input->buf->buffer->content[input->buf->buffer->use];
13508
13509 inputPush(ctxt, input);
13510 return(ctxt);
13511 }
13512
13513 #ifdef LIBXML_SAX1_ENABLED
13514 /**
13515 * xmlSAXParseMemoryWithData:
13516 * @sax: the SAX handler block
13517 * @buffer: an pointer to a char array
13518 * @size: the size of the array
13519 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13520 * documents
13521 * @data: the userdata
13522 *
13523 * parse an XML in-memory block and use the given SAX function block
13524 * to handle the parsing callback. If sax is NULL, fallback to the default
13525 * DOM tree building routines.
13526 *
13527 * User data (void *) is stored within the parser context in the
13528 * context's _private member, so it is available nearly everywhere in libxml
13529 *
13530 * Returns the resulting document tree
13531 */
13532
13533 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13534 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13535 int size, int recovery, void *data) {
13536 xmlDocPtr ret;
13537 xmlParserCtxtPtr ctxt;
13538
13539 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13540 if (ctxt == NULL) return(NULL);
13541 if (sax != NULL) {
13542 if (ctxt->sax != NULL)
13543 xmlFree(ctxt->sax);
13544 ctxt->sax = sax;
13545 }
13546 xmlDetectSAX2(ctxt);
13547 if (data!=NULL) {
13548 ctxt->_private=data;
13549 }
13550
13551 ctxt->recovery = recovery;
13552
13553 xmlParseDocument(ctxt);
13554
13555 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13556 else {
13557 ret = NULL;
13558 xmlFreeDoc(ctxt->myDoc);
13559 ctxt->myDoc = NULL;
13560 }
13561 if (sax != NULL)
13562 ctxt->sax = NULL;
13563 xmlFreeParserCtxt(ctxt);
13564
13565 return(ret);
13566 }
13567
13568 /**
13569 * xmlSAXParseMemory:
13570 * @sax: the SAX handler block
13571 * @buffer: an pointer to a char array
13572 * @size: the size of the array
13573 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13574 * documents
13575 *
13576 * parse an XML in-memory block and use the given SAX function block
13577 * to handle the parsing callback. If sax is NULL, fallback to the default
13578 * DOM tree building routines.
13579 *
13580 * Returns the resulting document tree
13581 */
13582 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13583 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13584 int size, int recovery) {
13585 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13586 }
13587
13588 /**
13589 * xmlParseMemory:
13590 * @buffer: an pointer to a char array
13591 * @size: the size of the array
13592 *
13593 * parse an XML in-memory block and build a tree.
13594 *
13595 * Returns the resulting document tree
13596 */
13597
xmlParseMemory(const char * buffer,int size)13598 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13599 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13600 }
13601
13602 /**
13603 * xmlRecoverMemory:
13604 * @buffer: an pointer to a char array
13605 * @size: the size of the array
13606 *
13607 * parse an XML in-memory block and build a tree.
13608 * In the case the document is not Well Formed, an attempt to
13609 * build a tree is tried anyway
13610 *
13611 * Returns the resulting document tree or NULL in case of error
13612 */
13613
xmlRecoverMemory(const char * buffer,int size)13614 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13615 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13616 }
13617
13618 /**
13619 * xmlSAXUserParseMemory:
13620 * @sax: a SAX handler
13621 * @user_data: The user data returned on SAX callbacks
13622 * @buffer: an in-memory XML document input
13623 * @size: the length of the XML document in bytes
13624 *
13625 * A better SAX parsing routine.
13626 * parse an XML in-memory buffer and call the given SAX handler routines.
13627 *
13628 * Returns 0 in case of success or a error number otherwise
13629 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13630 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13631 const char *buffer, int size) {
13632 int ret = 0;
13633 xmlParserCtxtPtr ctxt;
13634
13635 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13636 if (ctxt == NULL) return -1;
13637 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13638 xmlFree(ctxt->sax);
13639 ctxt->sax = sax;
13640 xmlDetectSAX2(ctxt);
13641
13642 if (user_data != NULL)
13643 ctxt->userData = user_data;
13644
13645 xmlParseDocument(ctxt);
13646
13647 if (ctxt->wellFormed)
13648 ret = 0;
13649 else {
13650 if (ctxt->errNo != 0)
13651 ret = ctxt->errNo;
13652 else
13653 ret = -1;
13654 }
13655 if (sax != NULL)
13656 ctxt->sax = NULL;
13657 if (ctxt->myDoc != NULL) {
13658 xmlFreeDoc(ctxt->myDoc);
13659 ctxt->myDoc = NULL;
13660 }
13661 xmlFreeParserCtxt(ctxt);
13662
13663 return ret;
13664 }
13665 #endif /* LIBXML_SAX1_ENABLED */
13666
13667 /**
13668 * xmlCreateDocParserCtxt:
13669 * @cur: a pointer to an array of xmlChar
13670 *
13671 * Creates a parser context for an XML in-memory document.
13672 *
13673 * Returns the new parser context or NULL
13674 */
13675 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)13676 xmlCreateDocParserCtxt(const xmlChar *cur) {
13677 int len;
13678
13679 if (cur == NULL)
13680 return(NULL);
13681 len = xmlStrlen(cur);
13682 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13683 }
13684
13685 #ifdef LIBXML_SAX1_ENABLED
13686 /**
13687 * xmlSAXParseDoc:
13688 * @sax: the SAX handler block
13689 * @cur: a pointer to an array of xmlChar
13690 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13691 * documents
13692 *
13693 * parse an XML in-memory document and build a tree.
13694 * It use the given SAX function block to handle the parsing callback.
13695 * If sax is NULL, fallback to the default DOM tree building routines.
13696 *
13697 * Returns the resulting document tree
13698 */
13699
13700 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)13701 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13702 xmlDocPtr ret;
13703 xmlParserCtxtPtr ctxt;
13704 xmlSAXHandlerPtr oldsax = NULL;
13705
13706 if (cur == NULL) return(NULL);
13707
13708
13709 ctxt = xmlCreateDocParserCtxt(cur);
13710 if (ctxt == NULL) return(NULL);
13711 if (sax != NULL) {
13712 oldsax = ctxt->sax;
13713 ctxt->sax = sax;
13714 ctxt->userData = NULL;
13715 }
13716 xmlDetectSAX2(ctxt);
13717
13718 xmlParseDocument(ctxt);
13719 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13720 else {
13721 ret = NULL;
13722 xmlFreeDoc(ctxt->myDoc);
13723 ctxt->myDoc = NULL;
13724 }
13725 if (sax != NULL)
13726 ctxt->sax = oldsax;
13727 xmlFreeParserCtxt(ctxt);
13728
13729 return(ret);
13730 }
13731
13732 /**
13733 * xmlParseDoc:
13734 * @cur: a pointer to an array of xmlChar
13735 *
13736 * parse an XML in-memory document and build a tree.
13737 *
13738 * Returns the resulting document tree
13739 */
13740
13741 xmlDocPtr
xmlParseDoc(const xmlChar * cur)13742 xmlParseDoc(const xmlChar *cur) {
13743 return(xmlSAXParseDoc(NULL, cur, 0));
13744 }
13745 #endif /* LIBXML_SAX1_ENABLED */
13746
13747 #ifdef LIBXML_LEGACY_ENABLED
13748 /************************************************************************
13749 * *
13750 * Specific function to keep track of entities references *
13751 * and used by the XSLT debugger *
13752 * *
13753 ************************************************************************/
13754
13755 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13756
13757 /**
13758 * xmlAddEntityReference:
13759 * @ent : A valid entity
13760 * @firstNode : A valid first node for children of entity
13761 * @lastNode : A valid last node of children entity
13762 *
13763 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13764 */
13765 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)13766 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13767 xmlNodePtr lastNode)
13768 {
13769 if (xmlEntityRefFunc != NULL) {
13770 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13771 }
13772 }
13773
13774
13775 /**
13776 * xmlSetEntityReferenceFunc:
13777 * @func: A valid function
13778 *
13779 * Set the function to call call back when a xml reference has been made
13780 */
13781 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)13782 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13783 {
13784 xmlEntityRefFunc = func;
13785 }
13786 #endif /* LIBXML_LEGACY_ENABLED */
13787
13788 /************************************************************************
13789 * *
13790 * Miscellaneous *
13791 * *
13792 ************************************************************************/
13793
13794 #ifdef LIBXML_XPATH_ENABLED
13795 #include <libxml/xpath.h>
13796 #endif
13797
13798 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
13799 static int xmlParserInitialized = 0;
13800
13801 /**
13802 * xmlInitParser:
13803 *
13804 * Initialization function for the XML parser.
13805 * This is not reentrant. Call once before processing in case of
13806 * use in multithreaded programs.
13807 */
13808
13809 void
xmlInitParser(void)13810 xmlInitParser(void) {
13811 if (xmlParserInitialized != 0)
13812 return;
13813
13814 #ifdef LIBXML_THREAD_ENABLED
13815 __xmlGlobalInitMutexLock();
13816 if (xmlParserInitialized == 0) {
13817 #endif
13818 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13819 (xmlGenericError == NULL))
13820 initGenericErrorDefaultFunc(NULL);
13821 xmlInitGlobals();
13822 xmlInitThreads();
13823 xmlInitMemory();
13824 xmlInitCharEncodingHandlers();
13825 xmlDefaultSAXHandlerInit();
13826 xmlRegisterDefaultInputCallbacks();
13827 #ifdef LIBXML_OUTPUT_ENABLED
13828 xmlRegisterDefaultOutputCallbacks();
13829 #endif /* LIBXML_OUTPUT_ENABLED */
13830 #ifdef LIBXML_HTML_ENABLED
13831 htmlInitAutoClose();
13832 htmlDefaultSAXHandlerInit();
13833 #endif
13834 #ifdef LIBXML_XPATH_ENABLED
13835 xmlXPathInit();
13836 #endif
13837 xmlParserInitialized = 1;
13838 #ifdef LIBXML_THREAD_ENABLED
13839 }
13840 __xmlGlobalInitMutexUnlock();
13841 #endif
13842 }
13843
13844 /**
13845 * xmlCleanupParser:
13846 *
13847 * This function name is somewhat misleading. It does not clean up
13848 * parser state, it cleans up memory allocated by the library itself.
13849 * It is a cleanup function for the XML library. It tries to reclaim all
13850 * related global memory allocated for the library processing.
13851 * It doesn't deallocate any document related memory. One should
13852 * call xmlCleanupParser() only when the process has finished using
13853 * the library and all XML/HTML documents built with it.
13854 * See also xmlInitParser() which has the opposite function of preparing
13855 * the library for operations.
13856 */
13857
13858 void
xmlCleanupParser(void)13859 xmlCleanupParser(void) {
13860 if (!xmlParserInitialized)
13861 return;
13862
13863 xmlCleanupCharEncodingHandlers();
13864 #ifdef LIBXML_CATALOG_ENABLED
13865 xmlCatalogCleanup();
13866 #endif
13867 xmlDictCleanup();
13868 xmlCleanupInputCallbacks();
13869 #ifdef LIBXML_OUTPUT_ENABLED
13870 xmlCleanupOutputCallbacks();
13871 #endif
13872 #ifdef LIBXML_SCHEMAS_ENABLED
13873 xmlSchemaCleanupTypes();
13874 xmlRelaxNGCleanupTypes();
13875 #endif
13876 xmlCleanupGlobals();
13877 xmlResetLastError();
13878 xmlCleanupThreads(); /* must be last if called not from the main thread */
13879 xmlCleanupMemory();
13880 xmlParserInitialized = 0;
13881 }
13882
13883 /************************************************************************
13884 * *
13885 * New set (2.6.0) of simpler and more flexible APIs *
13886 * *
13887 ************************************************************************/
13888
13889 /**
13890 * DICT_FREE:
13891 * @str: a string
13892 *
13893 * Free a string if it is not owned by the "dict" dictionnary in the
13894 * current scope
13895 */
13896 #define DICT_FREE(str) \
13897 if ((str) && ((!dict) || \
13898 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13899 xmlFree((char *)(str));
13900
13901 /**
13902 * xmlCtxtReset:
13903 * @ctxt: an XML parser context
13904 *
13905 * Reset a parser context
13906 */
13907 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)13908 xmlCtxtReset(xmlParserCtxtPtr ctxt)
13909 {
13910 xmlParserInputPtr input;
13911 xmlDictPtr dict;
13912
13913 if (ctxt == NULL)
13914 return;
13915
13916 dict = ctxt->dict;
13917
13918 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13919 xmlFreeInputStream(input);
13920 }
13921 ctxt->inputNr = 0;
13922 ctxt->input = NULL;
13923
13924 ctxt->spaceNr = 0;
13925 if (ctxt->spaceTab != NULL) {
13926 ctxt->spaceTab[0] = -1;
13927 ctxt->space = &ctxt->spaceTab[0];
13928 } else {
13929 ctxt->space = NULL;
13930 }
13931
13932
13933 ctxt->nodeNr = 0;
13934 ctxt->node = NULL;
13935
13936 ctxt->nameNr = 0;
13937 ctxt->name = NULL;
13938
13939 DICT_FREE(ctxt->version);
13940 ctxt->version = NULL;
13941 DICT_FREE(ctxt->encoding);
13942 ctxt->encoding = NULL;
13943 DICT_FREE(ctxt->directory);
13944 ctxt->directory = NULL;
13945 DICT_FREE(ctxt->extSubURI);
13946 ctxt->extSubURI = NULL;
13947 DICT_FREE(ctxt->extSubSystem);
13948 ctxt->extSubSystem = NULL;
13949 if (ctxt->myDoc != NULL)
13950 xmlFreeDoc(ctxt->myDoc);
13951 ctxt->myDoc = NULL;
13952
13953 ctxt->standalone = -1;
13954 ctxt->hasExternalSubset = 0;
13955 ctxt->hasPErefs = 0;
13956 ctxt->html = 0;
13957 ctxt->external = 0;
13958 ctxt->instate = XML_PARSER_START;
13959 ctxt->token = 0;
13960
13961 ctxt->wellFormed = 1;
13962 ctxt->nsWellFormed = 1;
13963 ctxt->disableSAX = 0;
13964 ctxt->valid = 1;
13965 #if 0
13966 ctxt->vctxt.userData = ctxt;
13967 ctxt->vctxt.error = xmlParserValidityError;
13968 ctxt->vctxt.warning = xmlParserValidityWarning;
13969 #endif
13970 ctxt->record_info = 0;
13971 ctxt->nbChars = 0;
13972 ctxt->checkIndex = 0;
13973 ctxt->inSubset = 0;
13974 ctxt->errNo = XML_ERR_OK;
13975 ctxt->depth = 0;
13976 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13977 ctxt->catalogs = NULL;
13978 ctxt->nbentities = 0;
13979 ctxt->sizeentities = 0;
13980 xmlInitNodeInfoSeq(&ctxt->node_seq);
13981
13982 if (ctxt->attsDefault != NULL) {
13983 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13984 ctxt->attsDefault = NULL;
13985 }
13986 if (ctxt->attsSpecial != NULL) {
13987 xmlHashFree(ctxt->attsSpecial, NULL);
13988 ctxt->attsSpecial = NULL;
13989 }
13990
13991 #ifdef LIBXML_CATALOG_ENABLED
13992 if (ctxt->catalogs != NULL)
13993 xmlCatalogFreeLocal(ctxt->catalogs);
13994 #endif
13995 if (ctxt->lastError.code != XML_ERR_OK)
13996 xmlResetError(&ctxt->lastError);
13997 }
13998
13999 /**
14000 * xmlCtxtResetPush:
14001 * @ctxt: an XML parser context
14002 * @chunk: a pointer to an array of chars
14003 * @size: number of chars in the array
14004 * @filename: an optional file name or URI
14005 * @encoding: the document encoding, or NULL
14006 *
14007 * Reset a push parser context
14008 *
14009 * Returns 0 in case of success and 1 in case of error
14010 */
14011 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14012 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14013 int size, const char *filename, const char *encoding)
14014 {
14015 xmlParserInputPtr inputStream;
14016 xmlParserInputBufferPtr buf;
14017 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14018
14019 if (ctxt == NULL)
14020 return(1);
14021
14022 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14023 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14024
14025 buf = xmlAllocParserInputBuffer(enc);
14026 if (buf == NULL)
14027 return(1);
14028
14029 if (ctxt == NULL) {
14030 xmlFreeParserInputBuffer(buf);
14031 return(1);
14032 }
14033
14034 xmlCtxtReset(ctxt);
14035
14036 if (ctxt->pushTab == NULL) {
14037 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14038 sizeof(xmlChar *));
14039 if (ctxt->pushTab == NULL) {
14040 xmlErrMemory(ctxt, NULL);
14041 xmlFreeParserInputBuffer(buf);
14042 return(1);
14043 }
14044 }
14045
14046 if (filename == NULL) {
14047 ctxt->directory = NULL;
14048 } else {
14049 ctxt->directory = xmlParserGetDirectory(filename);
14050 }
14051
14052 inputStream = xmlNewInputStream(ctxt);
14053 if (inputStream == NULL) {
14054 xmlFreeParserInputBuffer(buf);
14055 return(1);
14056 }
14057
14058 if (filename == NULL)
14059 inputStream->filename = NULL;
14060 else
14061 inputStream->filename = (char *)
14062 xmlCanonicPath((const xmlChar *) filename);
14063 inputStream->buf = buf;
14064 inputStream->base = inputStream->buf->buffer->content;
14065 inputStream->cur = inputStream->buf->buffer->content;
14066 inputStream->end =
14067 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14068
14069 inputPush(ctxt, inputStream);
14070
14071 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14072 (ctxt->input->buf != NULL)) {
14073 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14074 int cur = ctxt->input->cur - ctxt->input->base;
14075
14076 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14077
14078 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14079 ctxt->input->cur = ctxt->input->base + cur;
14080 ctxt->input->end =
14081 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14082 use];
14083 #ifdef DEBUG_PUSH
14084 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14085 #endif
14086 }
14087
14088 if (encoding != NULL) {
14089 xmlCharEncodingHandlerPtr hdlr;
14090
14091 if (ctxt->encoding != NULL)
14092 xmlFree((xmlChar *) ctxt->encoding);
14093 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14094
14095 hdlr = xmlFindCharEncodingHandler(encoding);
14096 if (hdlr != NULL) {
14097 xmlSwitchToEncoding(ctxt, hdlr);
14098 } else {
14099 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14100 "Unsupported encoding %s\n", BAD_CAST encoding);
14101 }
14102 } else if (enc != XML_CHAR_ENCODING_NONE) {
14103 xmlSwitchEncoding(ctxt, enc);
14104 }
14105
14106 return(0);
14107 }
14108
14109
14110 /**
14111 * xmlCtxtUseOptionsInternal:
14112 * @ctxt: an XML parser context
14113 * @options: a combination of xmlParserOption
14114 * @encoding: the user provided encoding to use
14115 *
14116 * Applies the options to the parser context
14117 *
14118 * Returns 0 in case of success, the set of unknown or unimplemented options
14119 * in case of error.
14120 */
14121 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14122 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14123 {
14124 if (ctxt == NULL)
14125 return(-1);
14126 if (encoding != NULL) {
14127 if (ctxt->encoding != NULL)
14128 xmlFree((xmlChar *) ctxt->encoding);
14129 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14130 }
14131 if (options & XML_PARSE_RECOVER) {
14132 ctxt->recovery = 1;
14133 options -= XML_PARSE_RECOVER;
14134 ctxt->options |= XML_PARSE_RECOVER;
14135 } else
14136 ctxt->recovery = 0;
14137 if (options & XML_PARSE_DTDLOAD) {
14138 ctxt->loadsubset = XML_DETECT_IDS;
14139 options -= XML_PARSE_DTDLOAD;
14140 ctxt->options |= XML_PARSE_DTDLOAD;
14141 } else
14142 ctxt->loadsubset = 0;
14143 if (options & XML_PARSE_DTDATTR) {
14144 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14145 options -= XML_PARSE_DTDATTR;
14146 ctxt->options |= XML_PARSE_DTDATTR;
14147 }
14148 if (options & XML_PARSE_NOENT) {
14149 ctxt->replaceEntities = 1;
14150 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14151 options -= XML_PARSE_NOENT;
14152 ctxt->options |= XML_PARSE_NOENT;
14153 } else
14154 ctxt->replaceEntities = 0;
14155 if (options & XML_PARSE_PEDANTIC) {
14156 ctxt->pedantic = 1;
14157 options -= XML_PARSE_PEDANTIC;
14158 ctxt->options |= XML_PARSE_PEDANTIC;
14159 } else
14160 ctxt->pedantic = 0;
14161 if (options & XML_PARSE_NOBLANKS) {
14162 ctxt->keepBlanks = 0;
14163 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14164 options -= XML_PARSE_NOBLANKS;
14165 ctxt->options |= XML_PARSE_NOBLANKS;
14166 } else
14167 ctxt->keepBlanks = 1;
14168 if (options & XML_PARSE_DTDVALID) {
14169 ctxt->validate = 1;
14170 if (options & XML_PARSE_NOWARNING)
14171 ctxt->vctxt.warning = NULL;
14172 if (options & XML_PARSE_NOERROR)
14173 ctxt->vctxt.error = NULL;
14174 options -= XML_PARSE_DTDVALID;
14175 ctxt->options |= XML_PARSE_DTDVALID;
14176 } else
14177 ctxt->validate = 0;
14178 if (options & XML_PARSE_NOWARNING) {
14179 ctxt->sax->warning = NULL;
14180 options -= XML_PARSE_NOWARNING;
14181 }
14182 if (options & XML_PARSE_NOERROR) {
14183 ctxt->sax->error = NULL;
14184 ctxt->sax->fatalError = NULL;
14185 options -= XML_PARSE_NOERROR;
14186 }
14187 #ifdef LIBXML_SAX1_ENABLED
14188 if (options & XML_PARSE_SAX1) {
14189 ctxt->sax->startElement = xmlSAX2StartElement;
14190 ctxt->sax->endElement = xmlSAX2EndElement;
14191 ctxt->sax->startElementNs = NULL;
14192 ctxt->sax->endElementNs = NULL;
14193 ctxt->sax->initialized = 1;
14194 options -= XML_PARSE_SAX1;
14195 ctxt->options |= XML_PARSE_SAX1;
14196 }
14197 #endif /* LIBXML_SAX1_ENABLED */
14198 if (options & XML_PARSE_NODICT) {
14199 ctxt->dictNames = 0;
14200 options -= XML_PARSE_NODICT;
14201 ctxt->options |= XML_PARSE_NODICT;
14202 } else {
14203 ctxt->dictNames = 1;
14204 }
14205 if (options & XML_PARSE_NOCDATA) {
14206 ctxt->sax->cdataBlock = NULL;
14207 options -= XML_PARSE_NOCDATA;
14208 ctxt->options |= XML_PARSE_NOCDATA;
14209 }
14210 if (options & XML_PARSE_NSCLEAN) {
14211 ctxt->options |= XML_PARSE_NSCLEAN;
14212 options -= XML_PARSE_NSCLEAN;
14213 }
14214 if (options & XML_PARSE_NONET) {
14215 ctxt->options |= XML_PARSE_NONET;
14216 options -= XML_PARSE_NONET;
14217 }
14218 if (options & XML_PARSE_COMPACT) {
14219 ctxt->options |= XML_PARSE_COMPACT;
14220 options -= XML_PARSE_COMPACT;
14221 }
14222 if (options & XML_PARSE_OLD10) {
14223 ctxt->options |= XML_PARSE_OLD10;
14224 options -= XML_PARSE_OLD10;
14225 }
14226 if (options & XML_PARSE_NOBASEFIX) {
14227 ctxt->options |= XML_PARSE_NOBASEFIX;
14228 options -= XML_PARSE_NOBASEFIX;
14229 }
14230 if (options & XML_PARSE_HUGE) {
14231 ctxt->options |= XML_PARSE_HUGE;
14232 options -= XML_PARSE_HUGE;
14233 }
14234 if (options & XML_PARSE_OLDSAX) {
14235 ctxt->options |= XML_PARSE_OLDSAX;
14236 options -= XML_PARSE_OLDSAX;
14237 }
14238 ctxt->linenumbers = 1;
14239 return (options);
14240 }
14241
14242 /**
14243 * xmlCtxtUseOptions:
14244 * @ctxt: an XML parser context
14245 * @options: a combination of xmlParserOption
14246 *
14247 * Applies the options to the parser context
14248 *
14249 * Returns 0 in case of success, the set of unknown or unimplemented options
14250 * in case of error.
14251 */
14252 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14253 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14254 {
14255 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14256 }
14257
14258 /**
14259 * xmlDoRead:
14260 * @ctxt: an XML parser context
14261 * @URL: the base URL to use for the document
14262 * @encoding: the document encoding, or NULL
14263 * @options: a combination of xmlParserOption
14264 * @reuse: keep the context for reuse
14265 *
14266 * Common front-end for the xmlRead functions
14267 *
14268 * Returns the resulting document tree or NULL
14269 */
14270 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14271 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14272 int options, int reuse)
14273 {
14274 xmlDocPtr ret;
14275
14276 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14277 if (encoding != NULL) {
14278 xmlCharEncodingHandlerPtr hdlr;
14279
14280 hdlr = xmlFindCharEncodingHandler(encoding);
14281 if (hdlr != NULL)
14282 xmlSwitchToEncoding(ctxt, hdlr);
14283 }
14284 if ((URL != NULL) && (ctxt->input != NULL) &&
14285 (ctxt->input->filename == NULL))
14286 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14287 xmlParseDocument(ctxt);
14288 if ((ctxt->wellFormed) || ctxt->recovery)
14289 ret = ctxt->myDoc;
14290 else {
14291 ret = NULL;
14292 if (ctxt->myDoc != NULL) {
14293 xmlFreeDoc(ctxt->myDoc);
14294 }
14295 }
14296 ctxt->myDoc = NULL;
14297 if (!reuse) {
14298 xmlFreeParserCtxt(ctxt);
14299 }
14300
14301 return (ret);
14302 }
14303
14304 /**
14305 * xmlReadDoc:
14306 * @cur: a pointer to a zero terminated string
14307 * @URL: the base URL to use for the document
14308 * @encoding: the document encoding, or NULL
14309 * @options: a combination of xmlParserOption
14310 *
14311 * parse an XML in-memory document and build a tree.
14312 *
14313 * Returns the resulting document tree
14314 */
14315 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14316 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14317 {
14318 xmlParserCtxtPtr ctxt;
14319
14320 if (cur == NULL)
14321 return (NULL);
14322
14323 ctxt = xmlCreateDocParserCtxt(cur);
14324 if (ctxt == NULL)
14325 return (NULL);
14326 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14327 }
14328
14329 /**
14330 * xmlReadFile:
14331 * @filename: a file or URL
14332 * @encoding: the document encoding, or NULL
14333 * @options: a combination of xmlParserOption
14334 *
14335 * parse an XML file from the filesystem or the network.
14336 *
14337 * Returns the resulting document tree
14338 */
14339 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14340 xmlReadFile(const char *filename, const char *encoding, int options)
14341 {
14342 xmlParserCtxtPtr ctxt;
14343
14344 ctxt = xmlCreateURLParserCtxt(filename, options);
14345 if (ctxt == NULL)
14346 return (NULL);
14347 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14348 }
14349
14350 /**
14351 * xmlReadMemory:
14352 * @buffer: a pointer to a char array
14353 * @size: the size of the array
14354 * @URL: the base URL to use for the document
14355 * @encoding: the document encoding, or NULL
14356 * @options: a combination of xmlParserOption
14357 *
14358 * parse an XML in-memory document and build a tree.
14359 *
14360 * Returns the resulting document tree
14361 */
14362 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14363 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14364 {
14365 xmlParserCtxtPtr ctxt;
14366
14367 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14368 if (ctxt == NULL)
14369 return (NULL);
14370 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14371 }
14372
14373 /**
14374 * xmlReadFd:
14375 * @fd: an open file descriptor
14376 * @URL: the base URL to use for the document
14377 * @encoding: the document encoding, or NULL
14378 * @options: a combination of xmlParserOption
14379 *
14380 * parse an XML from a file descriptor and build a tree.
14381 * NOTE that the file descriptor will not be closed when the
14382 * reader is closed or reset.
14383 *
14384 * Returns the resulting document tree
14385 */
14386 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14387 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14388 {
14389 xmlParserCtxtPtr ctxt;
14390 xmlParserInputBufferPtr input;
14391 xmlParserInputPtr stream;
14392
14393 if (fd < 0)
14394 return (NULL);
14395
14396 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14397 if (input == NULL)
14398 return (NULL);
14399 input->closecallback = NULL;
14400 ctxt = xmlNewParserCtxt();
14401 if (ctxt == NULL) {
14402 xmlFreeParserInputBuffer(input);
14403 return (NULL);
14404 }
14405 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14406 if (stream == NULL) {
14407 xmlFreeParserInputBuffer(input);
14408 xmlFreeParserCtxt(ctxt);
14409 return (NULL);
14410 }
14411 inputPush(ctxt, stream);
14412 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14413 }
14414
14415 /**
14416 * xmlReadIO:
14417 * @ioread: an I/O read function
14418 * @ioclose: an I/O close function
14419 * @ioctx: an I/O handler
14420 * @URL: the base URL to use for the document
14421 * @encoding: the document encoding, or NULL
14422 * @options: a combination of xmlParserOption
14423 *
14424 * parse an XML document from I/O functions and source and build a tree.
14425 *
14426 * Returns the resulting document tree
14427 */
14428 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14429 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14430 void *ioctx, const char *URL, const char *encoding, int options)
14431 {
14432 xmlParserCtxtPtr ctxt;
14433 xmlParserInputBufferPtr input;
14434 xmlParserInputPtr stream;
14435
14436 if (ioread == NULL)
14437 return (NULL);
14438
14439 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14440 XML_CHAR_ENCODING_NONE);
14441 if (input == NULL)
14442 return (NULL);
14443 ctxt = xmlNewParserCtxt();
14444 if (ctxt == NULL) {
14445 xmlFreeParserInputBuffer(input);
14446 return (NULL);
14447 }
14448 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14449 if (stream == NULL) {
14450 xmlFreeParserInputBuffer(input);
14451 xmlFreeParserCtxt(ctxt);
14452 return (NULL);
14453 }
14454 inputPush(ctxt, stream);
14455 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14456 }
14457
14458 /**
14459 * xmlCtxtReadDoc:
14460 * @ctxt: an XML parser context
14461 * @cur: a pointer to a zero terminated string
14462 * @URL: the base URL to use for the document
14463 * @encoding: the document encoding, or NULL
14464 * @options: a combination of xmlParserOption
14465 *
14466 * parse an XML in-memory document and build a tree.
14467 * This reuses the existing @ctxt parser context
14468 *
14469 * Returns the resulting document tree
14470 */
14471 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)14472 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14473 const char *URL, const char *encoding, int options)
14474 {
14475 xmlParserInputPtr stream;
14476
14477 if (cur == NULL)
14478 return (NULL);
14479 if (ctxt == NULL)
14480 return (NULL);
14481
14482 xmlCtxtReset(ctxt);
14483
14484 stream = xmlNewStringInputStream(ctxt, cur);
14485 if (stream == NULL) {
14486 return (NULL);
14487 }
14488 inputPush(ctxt, stream);
14489 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14490 }
14491
14492 /**
14493 * xmlCtxtReadFile:
14494 * @ctxt: an XML parser context
14495 * @filename: a file or URL
14496 * @encoding: the document encoding, or NULL
14497 * @options: a combination of xmlParserOption
14498 *
14499 * parse an XML file from the filesystem or the network.
14500 * This reuses the existing @ctxt parser context
14501 *
14502 * Returns the resulting document tree
14503 */
14504 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)14505 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14506 const char *encoding, int options)
14507 {
14508 xmlParserInputPtr stream;
14509
14510 if (filename == NULL)
14511 return (NULL);
14512 if (ctxt == NULL)
14513 return (NULL);
14514
14515 xmlCtxtReset(ctxt);
14516
14517 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14518 if (stream == NULL) {
14519 return (NULL);
14520 }
14521 inputPush(ctxt, stream);
14522 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14523 }
14524
14525 /**
14526 * xmlCtxtReadMemory:
14527 * @ctxt: an XML parser context
14528 * @buffer: a pointer to a char array
14529 * @size: the size of the array
14530 * @URL: the base URL to use for the document
14531 * @encoding: the document encoding, or NULL
14532 * @options: a combination of xmlParserOption
14533 *
14534 * parse an XML in-memory document and build a tree.
14535 * This reuses the existing @ctxt parser context
14536 *
14537 * Returns the resulting document tree
14538 */
14539 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14540 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14541 const char *URL, const char *encoding, int options)
14542 {
14543 xmlParserInputBufferPtr input;
14544 xmlParserInputPtr stream;
14545
14546 if (ctxt == NULL)
14547 return (NULL);
14548 if (buffer == NULL)
14549 return (NULL);
14550
14551 xmlCtxtReset(ctxt);
14552
14553 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14554 if (input == NULL) {
14555 return(NULL);
14556 }
14557
14558 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14559 if (stream == NULL) {
14560 xmlFreeParserInputBuffer(input);
14561 return(NULL);
14562 }
14563
14564 inputPush(ctxt, stream);
14565 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14566 }
14567
14568 /**
14569 * xmlCtxtReadFd:
14570 * @ctxt: an XML parser context
14571 * @fd: an open file descriptor
14572 * @URL: the base URL to use for the document
14573 * @encoding: the document encoding, or NULL
14574 * @options: a combination of xmlParserOption
14575 *
14576 * parse an XML from a file descriptor and build a tree.
14577 * This reuses the existing @ctxt parser context
14578 * NOTE that the file descriptor will not be closed when the
14579 * reader is closed or reset.
14580 *
14581 * Returns the resulting document tree
14582 */
14583 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14584 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14585 const char *URL, const char *encoding, int options)
14586 {
14587 xmlParserInputBufferPtr input;
14588 xmlParserInputPtr stream;
14589
14590 if (fd < 0)
14591 return (NULL);
14592 if (ctxt == NULL)
14593 return (NULL);
14594
14595 xmlCtxtReset(ctxt);
14596
14597
14598 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14599 if (input == NULL)
14600 return (NULL);
14601 input->closecallback = NULL;
14602 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14603 if (stream == NULL) {
14604 xmlFreeParserInputBuffer(input);
14605 return (NULL);
14606 }
14607 inputPush(ctxt, stream);
14608 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14609 }
14610
14611 /**
14612 * xmlCtxtReadIO:
14613 * @ctxt: an XML parser context
14614 * @ioread: an I/O read function
14615 * @ioclose: an I/O close function
14616 * @ioctx: an I/O handler
14617 * @URL: the base URL to use for the document
14618 * @encoding: the document encoding, or NULL
14619 * @options: a combination of xmlParserOption
14620 *
14621 * parse an XML document from I/O functions and source and build a tree.
14622 * This reuses the existing @ctxt parser context
14623 *
14624 * Returns the resulting document tree
14625 */
14626 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14627 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14628 xmlInputCloseCallback ioclose, void *ioctx,
14629 const char *URL,
14630 const char *encoding, int options)
14631 {
14632 xmlParserInputBufferPtr input;
14633 xmlParserInputPtr stream;
14634
14635 if (ioread == NULL)
14636 return (NULL);
14637 if (ctxt == NULL)
14638 return (NULL);
14639
14640 xmlCtxtReset(ctxt);
14641
14642 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14643 XML_CHAR_ENCODING_NONE);
14644 if (input == NULL)
14645 return (NULL);
14646 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14647 if (stream == NULL) {
14648 xmlFreeParserInputBuffer(input);
14649 return (NULL);
14650 }
14651 inputPush(ctxt, stream);
14652 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14653 }
14654
14655 #define bottom_parser
14656 #include "elfgcchack.h"
14657