1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33 #define IN_LIBXML
34 #include "libxml.h"
35
36 #if defined(WIN32) && !defined (__CYGWIN__)
37 #define XML_DIR_SEP '\\'
38 #else
39 #define XML_DIR_SEP '/'
40 #endif
41
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <libxml/xmlmemory.h>
46 #include <libxml/threads.h>
47 #include <libxml/globals.h>
48 #include <libxml/tree.h>
49 #include <libxml/parser.h>
50 #include <libxml/parserInternals.h>
51 #include <libxml/valid.h>
52 #include <libxml/entities.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/encoding.h>
55 #include <libxml/xmlIO.h>
56 #include <libxml/uri.h>
57 #ifdef LIBXML_CATALOG_ENABLED
58 #include <libxml/catalog.h>
59 #endif
60 #ifdef LIBXML_SCHEMAS_ENABLED
61 #include <libxml/xmlschemastypes.h>
62 #include <libxml/relaxng.h>
63 #endif
64 #ifdef HAVE_CTYPE_H
65 #include <ctype.h>
66 #endif
67 #ifdef HAVE_STDLIB_H
68 #include <stdlib.h>
69 #endif
70 #ifdef HAVE_SYS_STAT_H
71 #include <sys/stat.h>
72 #endif
73 #ifdef HAVE_FCNTL_H
74 #include <fcntl.h>
75 #endif
76 #ifdef HAVE_UNISTD_H
77 #include <unistd.h>
78 #endif
79 #ifdef HAVE_ZLIB_H
80 #include <zlib.h>
81 #endif
82 #ifdef HAVE_LZMA_H
83 #include <lzma.h>
84 #endif
85
86 static void
87 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
88
89 static xmlParserCtxtPtr
90 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
91 const xmlChar *base, xmlParserCtxtPtr pctx);
92
93 /************************************************************************
94 * *
95 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
96 * *
97 ************************************************************************/
98
99 #define XML_PARSER_BIG_ENTITY 1000
100 #define XML_PARSER_LOT_ENTITY 5000
101
102 /*
103 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
104 * replacement over the size in byte of the input indicates that you have
105 * and eponential behaviour. A value of 10 correspond to at least 3 entity
106 * replacement per byte of input.
107 */
108 #define XML_PARSER_NON_LINEAR 10
109
110 /*
111 * xmlParserEntityCheck
112 *
113 * Function to check non-linear entity expansion behaviour
114 * This is here to detect and stop exponential linear entity expansion
115 * This is not a limitation of the parser but a safety
116 * boundary feature. It can be disabled with the XML_PARSE_HUGE
117 * parser option.
118 */
119 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long size,xmlEntityPtr ent)120 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
121 xmlEntityPtr ent)
122 {
123 unsigned long consumed = 0;
124
125 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
126 return (0);
127 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
128 return (1);
129 if (size != 0) {
130 /*
131 * Do the check based on the replacement size of the entity
132 */
133 if (size < XML_PARSER_BIG_ENTITY)
134 return(0);
135
136 /*
137 * A limit on the amount of text data reasonably used
138 */
139 if (ctxt->input != NULL) {
140 consumed = ctxt->input->consumed +
141 (ctxt->input->cur - ctxt->input->base);
142 }
143 consumed += ctxt->sizeentities;
144
145 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
146 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
147 return (0);
148 } else if (ent != NULL) {
149 /*
150 * use the number of parsed entities in the replacement
151 */
152 size = ent->checked;
153
154 /*
155 * The amount of data parsed counting entities size only once
156 */
157 if (ctxt->input != NULL) {
158 consumed = ctxt->input->consumed +
159 (ctxt->input->cur - ctxt->input->base);
160 }
161 consumed += ctxt->sizeentities;
162
163 /*
164 * Check the density of entities for the amount of data
165 * knowing an entity reference will take at least 3 bytes
166 */
167 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
168 return (0);
169 } else {
170 /*
171 * strange we got no data for checking just return
172 */
173 return (0);
174 }
175
176 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
177 return (1);
178 }
179
180 /**
181 * xmlParserMaxDepth:
182 *
183 * arbitrary depth limit for the XML documents that we allow to
184 * process. This is not a limitation of the parser but a safety
185 * boundary feature. It can be disabled with the XML_PARSE_HUGE
186 * parser option.
187 */
188 unsigned int xmlParserMaxDepth = 256;
189
190
191
192 #define SAX2 1
193 #define XML_PARSER_BIG_BUFFER_SIZE 300
194 #define XML_PARSER_BUFFER_SIZE 100
195 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
196
197 /*
198 * List of XML prefixed PI allowed by W3C specs
199 */
200
201 static const char *xmlW3CPIs[] = {
202 "xml-stylesheet",
203 "xml-model",
204 NULL
205 };
206
207
208 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
209 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
210 const xmlChar **str);
211
212 static xmlParserErrors
213 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
214 xmlSAXHandlerPtr sax,
215 void *user_data, int depth, const xmlChar *URL,
216 const xmlChar *ID, xmlNodePtr *list);
217
218 static int
219 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
220 const char *encoding);
221 #ifdef LIBXML_LEGACY_ENABLED
222 static void
223 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
224 xmlNodePtr lastNode);
225 #endif /* LIBXML_LEGACY_ENABLED */
226
227 static xmlParserErrors
228 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
229 const xmlChar *string, void *user_data, xmlNodePtr *lst);
230
231 static int
232 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
233
234 /************************************************************************
235 * *
236 * Some factorized error routines *
237 * *
238 ************************************************************************/
239
240 /**
241 * xmlErrAttributeDup:
242 * @ctxt: an XML parser context
243 * @prefix: the attribute prefix
244 * @localname: the attribute localname
245 *
246 * Handle a redefinition of attribute error
247 */
248 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)249 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
250 const xmlChar * localname)
251 {
252 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253 (ctxt->instate == XML_PARSER_EOF))
254 return;
255 if (ctxt != NULL)
256 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
257
258 if (prefix == NULL)
259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
260 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
261 (const char *) localname, NULL, NULL, 0, 0,
262 "Attribute %s redefined\n", localname);
263 else
264 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
265 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
266 (const char *) prefix, (const char *) localname,
267 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
268 localname);
269 if (ctxt != NULL) {
270 ctxt->wellFormed = 0;
271 if (ctxt->recovery == 0)
272 ctxt->disableSAX = 1;
273 }
274 }
275
276 /**
277 * xmlFatalErr:
278 * @ctxt: an XML parser context
279 * @error: the error number
280 * @extra: extra information string
281 *
282 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
283 */
284 static void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)285 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
286 {
287 const char *errmsg;
288
289 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
290 (ctxt->instate == XML_PARSER_EOF))
291 return;
292 switch (error) {
293 case XML_ERR_INVALID_HEX_CHARREF:
294 errmsg = "CharRef: invalid hexadecimal value\n";
295 break;
296 case XML_ERR_INVALID_DEC_CHARREF:
297 errmsg = "CharRef: invalid decimal value\n";
298 break;
299 case XML_ERR_INVALID_CHARREF:
300 errmsg = "CharRef: invalid value\n";
301 break;
302 case XML_ERR_INTERNAL_ERROR:
303 errmsg = "internal error";
304 break;
305 case XML_ERR_PEREF_AT_EOF:
306 errmsg = "PEReference at end of document\n";
307 break;
308 case XML_ERR_PEREF_IN_PROLOG:
309 errmsg = "PEReference in prolog\n";
310 break;
311 case XML_ERR_PEREF_IN_EPILOG:
312 errmsg = "PEReference in epilog\n";
313 break;
314 case XML_ERR_PEREF_NO_NAME:
315 errmsg = "PEReference: no name\n";
316 break;
317 case XML_ERR_PEREF_SEMICOL_MISSING:
318 errmsg = "PEReference: expecting ';'\n";
319 break;
320 case XML_ERR_ENTITY_LOOP:
321 errmsg = "Detected an entity reference loop\n";
322 break;
323 case XML_ERR_ENTITY_NOT_STARTED:
324 errmsg = "EntityValue: \" or ' expected\n";
325 break;
326 case XML_ERR_ENTITY_PE_INTERNAL:
327 errmsg = "PEReferences forbidden in internal subset\n";
328 break;
329 case XML_ERR_ENTITY_NOT_FINISHED:
330 errmsg = "EntityValue: \" or ' expected\n";
331 break;
332 case XML_ERR_ATTRIBUTE_NOT_STARTED:
333 errmsg = "AttValue: \" or ' expected\n";
334 break;
335 case XML_ERR_LT_IN_ATTRIBUTE:
336 errmsg = "Unescaped '<' not allowed in attributes values\n";
337 break;
338 case XML_ERR_LITERAL_NOT_STARTED:
339 errmsg = "SystemLiteral \" or ' expected\n";
340 break;
341 case XML_ERR_LITERAL_NOT_FINISHED:
342 errmsg = "Unfinished System or Public ID \" or ' expected\n";
343 break;
344 case XML_ERR_MISPLACED_CDATA_END:
345 errmsg = "Sequence ']]>' not allowed in content\n";
346 break;
347 case XML_ERR_URI_REQUIRED:
348 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
349 break;
350 case XML_ERR_PUBID_REQUIRED:
351 errmsg = "PUBLIC, the Public Identifier is missing\n";
352 break;
353 case XML_ERR_HYPHEN_IN_COMMENT:
354 errmsg = "Comment must not contain '--' (double-hyphen)\n";
355 break;
356 case XML_ERR_PI_NOT_STARTED:
357 errmsg = "xmlParsePI : no target name\n";
358 break;
359 case XML_ERR_RESERVED_XML_NAME:
360 errmsg = "Invalid PI name\n";
361 break;
362 case XML_ERR_NOTATION_NOT_STARTED:
363 errmsg = "NOTATION: Name expected here\n";
364 break;
365 case XML_ERR_NOTATION_NOT_FINISHED:
366 errmsg = "'>' required to close NOTATION declaration\n";
367 break;
368 case XML_ERR_VALUE_REQUIRED:
369 errmsg = "Entity value required\n";
370 break;
371 case XML_ERR_URI_FRAGMENT:
372 errmsg = "Fragment not allowed";
373 break;
374 case XML_ERR_ATTLIST_NOT_STARTED:
375 errmsg = "'(' required to start ATTLIST enumeration\n";
376 break;
377 case XML_ERR_NMTOKEN_REQUIRED:
378 errmsg = "NmToken expected in ATTLIST enumeration\n";
379 break;
380 case XML_ERR_ATTLIST_NOT_FINISHED:
381 errmsg = "')' required to finish ATTLIST enumeration\n";
382 break;
383 case XML_ERR_MIXED_NOT_STARTED:
384 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
385 break;
386 case XML_ERR_PCDATA_REQUIRED:
387 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
388 break;
389 case XML_ERR_ELEMCONTENT_NOT_STARTED:
390 errmsg = "ContentDecl : Name or '(' expected\n";
391 break;
392 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
393 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
394 break;
395 case XML_ERR_PEREF_IN_INT_SUBSET:
396 errmsg =
397 "PEReference: forbidden within markup decl in internal subset\n";
398 break;
399 case XML_ERR_GT_REQUIRED:
400 errmsg = "expected '>'\n";
401 break;
402 case XML_ERR_CONDSEC_INVALID:
403 errmsg = "XML conditional section '[' expected\n";
404 break;
405 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
406 errmsg = "Content error in the external subset\n";
407 break;
408 case XML_ERR_CONDSEC_INVALID_KEYWORD:
409 errmsg =
410 "conditional section INCLUDE or IGNORE keyword expected\n";
411 break;
412 case XML_ERR_CONDSEC_NOT_FINISHED:
413 errmsg = "XML conditional section not closed\n";
414 break;
415 case XML_ERR_XMLDECL_NOT_STARTED:
416 errmsg = "Text declaration '<?xml' required\n";
417 break;
418 case XML_ERR_XMLDECL_NOT_FINISHED:
419 errmsg = "parsing XML declaration: '?>' expected\n";
420 break;
421 case XML_ERR_EXT_ENTITY_STANDALONE:
422 errmsg = "external parsed entities cannot be standalone\n";
423 break;
424 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
425 errmsg = "EntityRef: expecting ';'\n";
426 break;
427 case XML_ERR_DOCTYPE_NOT_FINISHED:
428 errmsg = "DOCTYPE improperly terminated\n";
429 break;
430 case XML_ERR_LTSLASH_REQUIRED:
431 errmsg = "EndTag: '</' not found\n";
432 break;
433 case XML_ERR_EQUAL_REQUIRED:
434 errmsg = "expected '='\n";
435 break;
436 case XML_ERR_STRING_NOT_CLOSED:
437 errmsg = "String not closed expecting \" or '\n";
438 break;
439 case XML_ERR_STRING_NOT_STARTED:
440 errmsg = "String not started expecting ' or \"\n";
441 break;
442 case XML_ERR_ENCODING_NAME:
443 errmsg = "Invalid XML encoding name\n";
444 break;
445 case XML_ERR_STANDALONE_VALUE:
446 errmsg = "standalone accepts only 'yes' or 'no'\n";
447 break;
448 case XML_ERR_DOCUMENT_EMPTY:
449 errmsg = "Document is empty\n";
450 break;
451 case XML_ERR_DOCUMENT_END:
452 errmsg = "Extra content at the end of the document\n";
453 break;
454 case XML_ERR_NOT_WELL_BALANCED:
455 errmsg = "chunk is not well balanced\n";
456 break;
457 case XML_ERR_EXTRA_CONTENT:
458 errmsg = "extra content at the end of well balanced chunk\n";
459 break;
460 case XML_ERR_VERSION_MISSING:
461 errmsg = "Malformed declaration expecting version\n";
462 break;
463 #if 0
464 case:
465 errmsg = "\n";
466 break;
467 #endif
468 default:
469 errmsg = "Unregistered error message\n";
470 }
471 if (ctxt != NULL)
472 ctxt->errNo = error;
473 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
474 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
475 info);
476 if (ctxt != NULL) {
477 ctxt->wellFormed = 0;
478 if (ctxt->recovery == 0)
479 ctxt->disableSAX = 1;
480 }
481 }
482
483 /**
484 * xmlFatalErrMsg:
485 * @ctxt: an XML parser context
486 * @error: the error number
487 * @msg: the error message
488 *
489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490 */
491 static void
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)492 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493 const char *msg)
494 {
495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
498 if (ctxt != NULL)
499 ctxt->errNo = error;
500 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
501 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
502 if (ctxt != NULL) {
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506 }
507 }
508
509 /**
510 * xmlWarningMsg:
511 * @ctxt: an XML parser context
512 * @error: the error number
513 * @msg: the error message
514 * @str1: extra data
515 * @str2: extra data
516 *
517 * Handle a warning.
518 */
519 static void
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)520 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
521 const char *msg, const xmlChar *str1, const xmlChar *str2)
522 {
523 xmlStructuredErrorFunc schannel = NULL;
524
525 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
526 (ctxt->instate == XML_PARSER_EOF))
527 return;
528 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
529 (ctxt->sax->initialized == XML_SAX2_MAGIC))
530 schannel = ctxt->sax->serror;
531 if (ctxt != NULL) {
532 __xmlRaiseError(schannel,
533 (ctxt->sax) ? ctxt->sax->warning : NULL,
534 ctxt->userData,
535 ctxt, NULL, XML_FROM_PARSER, error,
536 XML_ERR_WARNING, NULL, 0,
537 (const char *) str1, (const char *) str2, NULL, 0, 0,
538 msg, (const char *) str1, (const char *) str2);
539 } else {
540 __xmlRaiseError(schannel, NULL, NULL,
541 ctxt, NULL, XML_FROM_PARSER, error,
542 XML_ERR_WARNING, NULL, 0,
543 (const char *) str1, (const char *) str2, NULL, 0, 0,
544 msg, (const char *) str1, (const char *) str2);
545 }
546 }
547
548 /**
549 * xmlValidityError:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 *
555 * Handle a validity error.
556 */
557 static void
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)558 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
559 const char *msg, const xmlChar *str1, const xmlChar *str2)
560 {
561 xmlStructuredErrorFunc schannel = NULL;
562
563 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
564 (ctxt->instate == XML_PARSER_EOF))
565 return;
566 if (ctxt != NULL) {
567 ctxt->errNo = error;
568 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
569 schannel = ctxt->sax->serror;
570 }
571 if (ctxt != NULL) {
572 __xmlRaiseError(schannel,
573 ctxt->vctxt.error, ctxt->vctxt.userData,
574 ctxt, NULL, XML_FROM_DTD, error,
575 XML_ERR_ERROR, NULL, 0, (const char *) str1,
576 (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
578 ctxt->valid = 0;
579 } else {
580 __xmlRaiseError(schannel, NULL, NULL,
581 ctxt, NULL, XML_FROM_DTD, error,
582 XML_ERR_ERROR, NULL, 0, (const char *) str1,
583 (const char *) str2, NULL, 0, 0,
584 msg, (const char *) str1, (const char *) str2);
585 }
586 }
587
588 /**
589 * xmlFatalErrMsgInt:
590 * @ctxt: an XML parser context
591 * @error: the error number
592 * @msg: the error message
593 * @val: an integer value
594 *
595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
596 */
597 static void
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)598 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
599 const char *msg, int val)
600 {
601 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
602 (ctxt->instate == XML_PARSER_EOF))
603 return;
604 if (ctxt != NULL)
605 ctxt->errNo = error;
606 __xmlRaiseError(NULL, NULL, NULL,
607 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
608 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
609 if (ctxt != NULL) {
610 ctxt->wellFormed = 0;
611 if (ctxt->recovery == 0)
612 ctxt->disableSAX = 1;
613 }
614 }
615
616 /**
617 * xmlFatalErrMsgStrIntStr:
618 * @ctxt: an XML parser context
619 * @error: the error number
620 * @msg: the error message
621 * @str1: an string info
622 * @val: an integer value
623 * @str2: an string info
624 *
625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
626 */
627 static void
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)628 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629 const char *msg, const xmlChar *str1, int val,
630 const xmlChar *str2)
631 {
632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
633 (ctxt->instate == XML_PARSER_EOF))
634 return;
635 if (ctxt != NULL)
636 ctxt->errNo = error;
637 __xmlRaiseError(NULL, NULL, NULL,
638 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
639 NULL, 0, (const char *) str1, (const char *) str2,
640 NULL, val, 0, msg, str1, val, str2);
641 if (ctxt != NULL) {
642 ctxt->wellFormed = 0;
643 if (ctxt->recovery == 0)
644 ctxt->disableSAX = 1;
645 }
646 }
647
648 /**
649 * xmlFatalErrMsgStr:
650 * @ctxt: an XML parser context
651 * @error: the error number
652 * @msg: the error message
653 * @val: a string value
654 *
655 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
656 */
657 static void
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)658 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
659 const char *msg, const xmlChar * val)
660 {
661 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
662 (ctxt->instate == XML_PARSER_EOF))
663 return;
664 if (ctxt != NULL)
665 ctxt->errNo = error;
666 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
667 XML_FROM_PARSER, error, XML_ERR_FATAL,
668 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
669 val);
670 if (ctxt != NULL) {
671 ctxt->wellFormed = 0;
672 if (ctxt->recovery == 0)
673 ctxt->disableSAX = 1;
674 }
675 }
676
677 /**
678 * xmlErrMsgStr:
679 * @ctxt: an XML parser context
680 * @error: the error number
681 * @msg: the error message
682 * @val: a string value
683 *
684 * Handle a non fatal parser error
685 */
686 static void
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)687 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
688 const char *msg, const xmlChar * val)
689 {
690 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
691 (ctxt->instate == XML_PARSER_EOF))
692 return;
693 if (ctxt != NULL)
694 ctxt->errNo = error;
695 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
696 XML_FROM_PARSER, error, XML_ERR_ERROR,
697 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
698 val);
699 }
700
701 /**
702 * xmlNsErr:
703 * @ctxt: an XML parser context
704 * @error: the error number
705 * @msg: the message
706 * @info1: extra information string
707 * @info2: extra information string
708 *
709 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
710 */
711 static void
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)712 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
713 const char *msg,
714 const xmlChar * info1, const xmlChar * info2,
715 const xmlChar * info3)
716 {
717 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
718 (ctxt->instate == XML_PARSER_EOF))
719 return;
720 if (ctxt != NULL)
721 ctxt->errNo = error;
722 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
723 XML_ERR_ERROR, NULL, 0, (const char *) info1,
724 (const char *) info2, (const char *) info3, 0, 0, msg,
725 info1, info2, info3);
726 if (ctxt != NULL)
727 ctxt->nsWellFormed = 0;
728 }
729
730 /**
731 * xmlNsWarn
732 * @ctxt: an XML parser context
733 * @error: the error number
734 * @msg: the message
735 * @info1: extra information string
736 * @info2: extra information string
737 *
738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
739 */
740 static void
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)741 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742 const char *msg,
743 const xmlChar * info1, const xmlChar * info2,
744 const xmlChar * info3)
745 {
746 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
747 (ctxt->instate == XML_PARSER_EOF))
748 return;
749 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
750 XML_ERR_WARNING, NULL, 0, (const char *) info1,
751 (const char *) info2, (const char *) info3, 0, 0, msg,
752 info1, info2, info3);
753 }
754
755 /************************************************************************
756 * *
757 * Library wide options *
758 * *
759 ************************************************************************/
760
761 /**
762 * xmlHasFeature:
763 * @feature: the feature to be examined
764 *
765 * Examines if the library has been compiled with a given feature.
766 *
767 * Returns a non-zero value if the feature exist, otherwise zero.
768 * Returns zero (0) if the feature does not exist or an unknown
769 * unknown feature is requested, non-zero otherwise.
770 */
771 int
xmlHasFeature(xmlFeature feature)772 xmlHasFeature(xmlFeature feature)
773 {
774 switch (feature) {
775 case XML_WITH_THREAD:
776 #ifdef LIBXML_THREAD_ENABLED
777 return(1);
778 #else
779 return(0);
780 #endif
781 case XML_WITH_TREE:
782 #ifdef LIBXML_TREE_ENABLED
783 return(1);
784 #else
785 return(0);
786 #endif
787 case XML_WITH_OUTPUT:
788 #ifdef LIBXML_OUTPUT_ENABLED
789 return(1);
790 #else
791 return(0);
792 #endif
793 case XML_WITH_PUSH:
794 #ifdef LIBXML_PUSH_ENABLED
795 return(1);
796 #else
797 return(0);
798 #endif
799 case XML_WITH_READER:
800 #ifdef LIBXML_READER_ENABLED
801 return(1);
802 #else
803 return(0);
804 #endif
805 case XML_WITH_PATTERN:
806 #ifdef LIBXML_PATTERN_ENABLED
807 return(1);
808 #else
809 return(0);
810 #endif
811 case XML_WITH_WRITER:
812 #ifdef LIBXML_WRITER_ENABLED
813 return(1);
814 #else
815 return(0);
816 #endif
817 case XML_WITH_SAX1:
818 #ifdef LIBXML_SAX1_ENABLED
819 return(1);
820 #else
821 return(0);
822 #endif
823 case XML_WITH_FTP:
824 #ifdef LIBXML_FTP_ENABLED
825 return(1);
826 #else
827 return(0);
828 #endif
829 case XML_WITH_HTTP:
830 #ifdef LIBXML_HTTP_ENABLED
831 return(1);
832 #else
833 return(0);
834 #endif
835 case XML_WITH_VALID:
836 #ifdef LIBXML_VALID_ENABLED
837 return(1);
838 #else
839 return(0);
840 #endif
841 case XML_WITH_HTML:
842 #ifdef LIBXML_HTML_ENABLED
843 return(1);
844 #else
845 return(0);
846 #endif
847 case XML_WITH_LEGACY:
848 #ifdef LIBXML_LEGACY_ENABLED
849 return(1);
850 #else
851 return(0);
852 #endif
853 case XML_WITH_C14N:
854 #ifdef LIBXML_C14N_ENABLED
855 return(1);
856 #else
857 return(0);
858 #endif
859 case XML_WITH_CATALOG:
860 #ifdef LIBXML_CATALOG_ENABLED
861 return(1);
862 #else
863 return(0);
864 #endif
865 case XML_WITH_XPATH:
866 #ifdef LIBXML_XPATH_ENABLED
867 return(1);
868 #else
869 return(0);
870 #endif
871 case XML_WITH_XPTR:
872 #ifdef LIBXML_XPTR_ENABLED
873 return(1);
874 #else
875 return(0);
876 #endif
877 case XML_WITH_XINCLUDE:
878 #ifdef LIBXML_XINCLUDE_ENABLED
879 return(1);
880 #else
881 return(0);
882 #endif
883 case XML_WITH_ICONV:
884 #ifdef LIBXML_ICONV_ENABLED
885 return(1);
886 #else
887 return(0);
888 #endif
889 case XML_WITH_ISO8859X:
890 #ifdef LIBXML_ISO8859X_ENABLED
891 return(1);
892 #else
893 return(0);
894 #endif
895 case XML_WITH_UNICODE:
896 #ifdef LIBXML_UNICODE_ENABLED
897 return(1);
898 #else
899 return(0);
900 #endif
901 case XML_WITH_REGEXP:
902 #ifdef LIBXML_REGEXP_ENABLED
903 return(1);
904 #else
905 return(0);
906 #endif
907 case XML_WITH_AUTOMATA:
908 #ifdef LIBXML_AUTOMATA_ENABLED
909 return(1);
910 #else
911 return(0);
912 #endif
913 case XML_WITH_EXPR:
914 #ifdef LIBXML_EXPR_ENABLED
915 return(1);
916 #else
917 return(0);
918 #endif
919 case XML_WITH_SCHEMAS:
920 #ifdef LIBXML_SCHEMAS_ENABLED
921 return(1);
922 #else
923 return(0);
924 #endif
925 case XML_WITH_SCHEMATRON:
926 #ifdef LIBXML_SCHEMATRON_ENABLED
927 return(1);
928 #else
929 return(0);
930 #endif
931 case XML_WITH_MODULES:
932 #ifdef LIBXML_MODULES_ENABLED
933 return(1);
934 #else
935 return(0);
936 #endif
937 case XML_WITH_DEBUG:
938 #ifdef LIBXML_DEBUG_ENABLED
939 return(1);
940 #else
941 return(0);
942 #endif
943 case XML_WITH_DEBUG_MEM:
944 #ifdef DEBUG_MEMORY_LOCATION
945 return(1);
946 #else
947 return(0);
948 #endif
949 case XML_WITH_DEBUG_RUN:
950 #ifdef LIBXML_DEBUG_RUNTIME
951 return(1);
952 #else
953 return(0);
954 #endif
955 case XML_WITH_ZLIB:
956 #ifdef LIBXML_ZLIB_ENABLED
957 return(1);
958 #else
959 return(0);
960 #endif
961 case XML_WITH_LZMA:
962 #ifdef LIBXML_LZMA_ENABLED
963 return(1);
964 #else
965 return(0);
966 #endif
967 case XML_WITH_ICU:
968 #ifdef LIBXML_ICU_ENABLED
969 return(1);
970 #else
971 return(0);
972 #endif
973 default:
974 break;
975 }
976 return(0);
977 }
978
979 /************************************************************************
980 * *
981 * SAX2 defaulted attributes handling *
982 * *
983 ************************************************************************/
984
985 /**
986 * xmlDetectSAX2:
987 * @ctxt: an XML parser context
988 *
989 * Do the SAX2 detection and specific intialization
990 */
991 static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt)992 xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
993 if (ctxt == NULL) return;
994 #ifdef LIBXML_SAX1_ENABLED
995 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
996 ((ctxt->sax->startElementNs != NULL) ||
997 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
998 #else
999 ctxt->sax2 = 1;
1000 #endif /* LIBXML_SAX1_ENABLED */
1001
1002 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1003 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1004 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1005 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1006 (ctxt->str_xml_ns == NULL)) {
1007 xmlErrMemory(ctxt, NULL);
1008 }
1009 }
1010
1011 typedef struct _xmlDefAttrs xmlDefAttrs;
1012 typedef xmlDefAttrs *xmlDefAttrsPtr;
1013 struct _xmlDefAttrs {
1014 int nbAttrs; /* number of defaulted attributes on that element */
1015 int maxAttrs; /* the size of the array */
1016 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1017 };
1018
1019 /**
1020 * xmlAttrNormalizeSpace:
1021 * @src: the source string
1022 * @dst: the target string
1023 *
1024 * Normalize the space in non CDATA attribute values:
1025 * If the attribute type is not CDATA, then the XML processor MUST further
1026 * process the normalized attribute value by discarding any leading and
1027 * trailing space (#x20) characters, and by replacing sequences of space
1028 * (#x20) characters by a single space (#x20) character.
1029 * Note that the size of dst need to be at least src, and if one doesn't need
1030 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1031 * passing src as dst is just fine.
1032 *
1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034 * is needed.
1035 */
1036 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1037 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1038 {
1039 if ((src == NULL) || (dst == NULL))
1040 return(NULL);
1041
1042 while (*src == 0x20) src++;
1043 while (*src != 0) {
1044 if (*src == 0x20) {
1045 while (*src == 0x20) src++;
1046 if (*src != 0)
1047 *dst++ = 0x20;
1048 } else {
1049 *dst++ = *src++;
1050 }
1051 }
1052 *dst = 0;
1053 if (dst == src)
1054 return(NULL);
1055 return(dst);
1056 }
1057
1058 /**
1059 * xmlAttrNormalizeSpace2:
1060 * @src: the source string
1061 *
1062 * Normalize the space in non CDATA attribute values, a slightly more complex
1063 * front end to avoid allocation problems when running on attribute values
1064 * coming from the input.
1065 *
1066 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1067 * is needed.
1068 */
1069 static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt,xmlChar * src,int * len)1070 xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1071 {
1072 int i;
1073 int remove_head = 0;
1074 int need_realloc = 0;
1075 const xmlChar *cur;
1076
1077 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1078 return(NULL);
1079 i = *len;
1080 if (i <= 0)
1081 return(NULL);
1082
1083 cur = src;
1084 while (*cur == 0x20) {
1085 cur++;
1086 remove_head++;
1087 }
1088 while (*cur != 0) {
1089 if (*cur == 0x20) {
1090 cur++;
1091 if ((*cur == 0x20) || (*cur == 0)) {
1092 need_realloc = 1;
1093 break;
1094 }
1095 } else
1096 cur++;
1097 }
1098 if (need_realloc) {
1099 xmlChar *ret;
1100
1101 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1102 if (ret == NULL) {
1103 xmlErrMemory(ctxt, NULL);
1104 return(NULL);
1105 }
1106 xmlAttrNormalizeSpace(ret, ret);
1107 *len = (int) strlen((const char *)ret);
1108 return(ret);
1109 } else if (remove_head) {
1110 *len -= remove_head;
1111 memmove(src, src + remove_head, 1 + *len);
1112 return(src);
1113 }
1114 return(NULL);
1115 }
1116
1117 /**
1118 * xmlAddDefAttrs:
1119 * @ctxt: an XML parser context
1120 * @fullname: the element fullname
1121 * @fullattr: the attribute fullname
1122 * @value: the attribute value
1123 *
1124 * Add a defaulted attribute for an element
1125 */
1126 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1127 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1128 const xmlChar *fullname,
1129 const xmlChar *fullattr,
1130 const xmlChar *value) {
1131 xmlDefAttrsPtr defaults;
1132 int len;
1133 const xmlChar *name;
1134 const xmlChar *prefix;
1135
1136 /*
1137 * Allows to detect attribute redefinitions
1138 */
1139 if (ctxt->attsSpecial != NULL) {
1140 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1141 return;
1142 }
1143
1144 if (ctxt->attsDefault == NULL) {
1145 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1146 if (ctxt->attsDefault == NULL)
1147 goto mem_error;
1148 }
1149
1150 /*
1151 * split the element name into prefix:localname , the string found
1152 * are within the DTD and then not associated to namespace names.
1153 */
1154 name = xmlSplitQName3(fullname, &len);
1155 if (name == NULL) {
1156 name = xmlDictLookup(ctxt->dict, fullname, -1);
1157 prefix = NULL;
1158 } else {
1159 name = xmlDictLookup(ctxt->dict, name, -1);
1160 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1161 }
1162
1163 /*
1164 * make sure there is some storage
1165 */
1166 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1167 if (defaults == NULL) {
1168 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1169 (4 * 5) * sizeof(const xmlChar *));
1170 if (defaults == NULL)
1171 goto mem_error;
1172 defaults->nbAttrs = 0;
1173 defaults->maxAttrs = 4;
1174 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1175 defaults, NULL) < 0) {
1176 xmlFree(defaults);
1177 goto mem_error;
1178 }
1179 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1180 xmlDefAttrsPtr temp;
1181
1182 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1183 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1184 if (temp == NULL)
1185 goto mem_error;
1186 defaults = temp;
1187 defaults->maxAttrs *= 2;
1188 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1189 defaults, NULL) < 0) {
1190 xmlFree(defaults);
1191 goto mem_error;
1192 }
1193 }
1194
1195 /*
1196 * Split the element name into prefix:localname , the string found
1197 * are within the DTD and hen not associated to namespace names.
1198 */
1199 name = xmlSplitQName3(fullattr, &len);
1200 if (name == NULL) {
1201 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1202 prefix = NULL;
1203 } else {
1204 name = xmlDictLookup(ctxt->dict, name, -1);
1205 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1206 }
1207
1208 defaults->values[5 * defaults->nbAttrs] = name;
1209 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1210 /* intern the string and precompute the end */
1211 len = xmlStrlen(value);
1212 value = xmlDictLookup(ctxt->dict, value, len);
1213 defaults->values[5 * defaults->nbAttrs + 2] = value;
1214 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1215 if (ctxt->external)
1216 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1217 else
1218 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1219 defaults->nbAttrs++;
1220
1221 return;
1222
1223 mem_error:
1224 xmlErrMemory(ctxt, NULL);
1225 return;
1226 }
1227
1228 /**
1229 * xmlAddSpecialAttr:
1230 * @ctxt: an XML parser context
1231 * @fullname: the element fullname
1232 * @fullattr: the attribute fullname
1233 * @type: the attribute type
1234 *
1235 * Register this attribute type
1236 */
1237 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1238 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1239 const xmlChar *fullname,
1240 const xmlChar *fullattr,
1241 int type)
1242 {
1243 if (ctxt->attsSpecial == NULL) {
1244 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1245 if (ctxt->attsSpecial == NULL)
1246 goto mem_error;
1247 }
1248
1249 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1250 return;
1251
1252 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1253 (void *) (long) type);
1254 return;
1255
1256 mem_error:
1257 xmlErrMemory(ctxt, NULL);
1258 return;
1259 }
1260
1261 /**
1262 * xmlCleanSpecialAttrCallback:
1263 *
1264 * Removes CDATA attributes from the special attribute table
1265 */
1266 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1267 xmlCleanSpecialAttrCallback(void *payload, void *data,
1268 const xmlChar *fullname, const xmlChar *fullattr,
1269 const xmlChar *unused ATTRIBUTE_UNUSED) {
1270 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1271
1272 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1273 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1274 }
1275 }
1276
1277 /**
1278 * xmlCleanSpecialAttr:
1279 * @ctxt: an XML parser context
1280 *
1281 * Trim the list of attributes defined to remove all those of type
1282 * CDATA as they are not special. This call should be done when finishing
1283 * to parse the DTD and before starting to parse the document root.
1284 */
1285 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1286 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1287 {
1288 if (ctxt->attsSpecial == NULL)
1289 return;
1290
1291 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1292
1293 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1294 xmlHashFree(ctxt->attsSpecial, NULL);
1295 ctxt->attsSpecial = NULL;
1296 }
1297 return;
1298 }
1299
1300 /**
1301 * xmlCheckLanguageID:
1302 * @lang: pointer to the string value
1303 *
1304 * Checks that the value conforms to the LanguageID production:
1305 *
1306 * NOTE: this is somewhat deprecated, those productions were removed from
1307 * the XML Second edition.
1308 *
1309 * [33] LanguageID ::= Langcode ('-' Subcode)*
1310 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1311 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1312 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1313 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1314 * [38] Subcode ::= ([a-z] | [A-Z])+
1315 *
1316 * The current REC reference the sucessors of RFC 1766, currently 5646
1317 *
1318 * http://www.rfc-editor.org/rfc/rfc5646.txt
1319 * langtag = language
1320 * ["-" script]
1321 * ["-" region]
1322 * *("-" variant)
1323 * *("-" extension)
1324 * ["-" privateuse]
1325 * language = 2*3ALPHA ; shortest ISO 639 code
1326 * ["-" extlang] ; sometimes followed by
1327 * ; extended language subtags
1328 * / 4ALPHA ; or reserved for future use
1329 * / 5*8ALPHA ; or registered language subtag
1330 *
1331 * extlang = 3ALPHA ; selected ISO 639 codes
1332 * *2("-" 3ALPHA) ; permanently reserved
1333 *
1334 * script = 4ALPHA ; ISO 15924 code
1335 *
1336 * region = 2ALPHA ; ISO 3166-1 code
1337 * / 3DIGIT ; UN M.49 code
1338 *
1339 * variant = 5*8alphanum ; registered variants
1340 * / (DIGIT 3alphanum)
1341 *
1342 * extension = singleton 1*("-" (2*8alphanum))
1343 *
1344 * ; Single alphanumerics
1345 * ; "x" reserved for private use
1346 * singleton = DIGIT ; 0 - 9
1347 * / %x41-57 ; A - W
1348 * / %x59-5A ; Y - Z
1349 * / %x61-77 ; a - w
1350 * / %x79-7A ; y - z
1351 *
1352 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1353 * The parser below doesn't try to cope with extension or privateuse
1354 * that could be added but that's not interoperable anyway
1355 *
1356 * Returns 1 if correct 0 otherwise
1357 **/
1358 int
xmlCheckLanguageID(const xmlChar * lang)1359 xmlCheckLanguageID(const xmlChar * lang)
1360 {
1361 const xmlChar *cur = lang, *nxt;
1362
1363 if (cur == NULL)
1364 return (0);
1365 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1366 ((cur[0] == 'I') && (cur[1] == '-')) ||
1367 ((cur[0] == 'x') && (cur[1] == '-')) ||
1368 ((cur[0] == 'X') && (cur[1] == '-'))) {
1369 /*
1370 * Still allow IANA code and user code which were coming
1371 * from the previous version of the XML-1.0 specification
1372 * it's deprecated but we should not fail
1373 */
1374 cur += 2;
1375 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1376 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1377 cur++;
1378 return(cur[0] == 0);
1379 }
1380 nxt = cur;
1381 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1382 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1383 nxt++;
1384 if (nxt - cur >= 4) {
1385 /*
1386 * Reserved
1387 */
1388 if ((nxt - cur > 8) || (nxt[0] != 0))
1389 return(0);
1390 return(1);
1391 }
1392 if (nxt - cur < 2)
1393 return(0);
1394 /* we got an ISO 639 code */
1395 if (nxt[0] == 0)
1396 return(1);
1397 if (nxt[0] != '-')
1398 return(0);
1399
1400 nxt++;
1401 cur = nxt;
1402 /* now we can have extlang or script or region or variant */
1403 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1404 goto region_m49;
1405
1406 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1407 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1408 nxt++;
1409 if (nxt - cur == 4)
1410 goto script;
1411 if (nxt - cur == 2)
1412 goto region;
1413 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1414 goto variant;
1415 if (nxt - cur != 3)
1416 return(0);
1417 /* we parsed an extlang */
1418 if (nxt[0] == 0)
1419 return(1);
1420 if (nxt[0] != '-')
1421 return(0);
1422
1423 nxt++;
1424 cur = nxt;
1425 /* now we can have script or region or variant */
1426 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1427 goto region_m49;
1428
1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1431 nxt++;
1432 if (nxt - cur == 2)
1433 goto region;
1434 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1435 goto variant;
1436 if (nxt - cur != 4)
1437 return(0);
1438 /* we parsed a script */
1439 script:
1440 if (nxt[0] == 0)
1441 return(1);
1442 if (nxt[0] != '-')
1443 return(0);
1444
1445 nxt++;
1446 cur = nxt;
1447 /* now we can have region or variant */
1448 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1449 goto region_m49;
1450
1451 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1452 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1453 nxt++;
1454
1455 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456 goto variant;
1457 if (nxt - cur != 2)
1458 return(0);
1459 /* we parsed a region */
1460 region:
1461 if (nxt[0] == 0)
1462 return(1);
1463 if (nxt[0] != '-')
1464 return(0);
1465
1466 nxt++;
1467 cur = nxt;
1468 /* now we can just have a variant */
1469 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1470 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1471 nxt++;
1472
1473 if ((nxt - cur < 5) || (nxt - cur > 8))
1474 return(0);
1475
1476 /* we parsed a variant */
1477 variant:
1478 if (nxt[0] == 0)
1479 return(1);
1480 if (nxt[0] != '-')
1481 return(0);
1482 /* extensions and private use subtags not checked */
1483 return (1);
1484
1485 region_m49:
1486 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1487 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1488 nxt += 3;
1489 goto region;
1490 }
1491 return(0);
1492 }
1493
1494 /************************************************************************
1495 * *
1496 * Parser stacks related functions and macros *
1497 * *
1498 ************************************************************************/
1499
1500 static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1501 const xmlChar ** str);
1502
1503 #ifdef SAX2
1504 /**
1505 * nsPush:
1506 * @ctxt: an XML parser context
1507 * @prefix: the namespace prefix or NULL
1508 * @URL: the namespace name
1509 *
1510 * Pushes a new parser namespace on top of the ns stack
1511 *
1512 * Returns -1 in case of error, -2 if the namespace should be discarded
1513 * and the index in the stack otherwise.
1514 */
1515 static int
nsPush(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URL)1516 nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1517 {
1518 if (ctxt->options & XML_PARSE_NSCLEAN) {
1519 int i;
1520 for (i = 0;i < ctxt->nsNr;i += 2) {
1521 if (ctxt->nsTab[i] == prefix) {
1522 /* in scope */
1523 if (ctxt->nsTab[i + 1] == URL)
1524 return(-2);
1525 /* out of scope keep it */
1526 break;
1527 }
1528 }
1529 }
1530 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1531 ctxt->nsMax = 10;
1532 ctxt->nsNr = 0;
1533 ctxt->nsTab = (const xmlChar **)
1534 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1535 if (ctxt->nsTab == NULL) {
1536 xmlErrMemory(ctxt, NULL);
1537 ctxt->nsMax = 0;
1538 return (-1);
1539 }
1540 } else if (ctxt->nsNr >= ctxt->nsMax) {
1541 const xmlChar ** tmp;
1542 ctxt->nsMax *= 2;
1543 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1544 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1545 if (tmp == NULL) {
1546 xmlErrMemory(ctxt, NULL);
1547 ctxt->nsMax /= 2;
1548 return (-1);
1549 }
1550 ctxt->nsTab = tmp;
1551 }
1552 ctxt->nsTab[ctxt->nsNr++] = prefix;
1553 ctxt->nsTab[ctxt->nsNr++] = URL;
1554 return (ctxt->nsNr);
1555 }
1556 /**
1557 * nsPop:
1558 * @ctxt: an XML parser context
1559 * @nr: the number to pop
1560 *
1561 * Pops the top @nr parser prefix/namespace from the ns stack
1562 *
1563 * Returns the number of namespaces removed
1564 */
1565 static int
nsPop(xmlParserCtxtPtr ctxt,int nr)1566 nsPop(xmlParserCtxtPtr ctxt, int nr)
1567 {
1568 int i;
1569
1570 if (ctxt->nsTab == NULL) return(0);
1571 if (ctxt->nsNr < nr) {
1572 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1573 nr = ctxt->nsNr;
1574 }
1575 if (ctxt->nsNr <= 0)
1576 return (0);
1577
1578 for (i = 0;i < nr;i++) {
1579 ctxt->nsNr--;
1580 ctxt->nsTab[ctxt->nsNr] = NULL;
1581 }
1582 return(nr);
1583 }
1584 #endif
1585
1586 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1587 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1588 const xmlChar **atts;
1589 int *attallocs;
1590 int maxatts;
1591
1592 if (ctxt->atts == NULL) {
1593 maxatts = 55; /* allow for 10 attrs by default */
1594 atts = (const xmlChar **)
1595 xmlMalloc(maxatts * sizeof(xmlChar *));
1596 if (atts == NULL) goto mem_error;
1597 ctxt->atts = atts;
1598 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1599 if (attallocs == NULL) goto mem_error;
1600 ctxt->attallocs = attallocs;
1601 ctxt->maxatts = maxatts;
1602 } else if (nr + 5 > ctxt->maxatts) {
1603 maxatts = (nr + 5) * 2;
1604 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1605 maxatts * sizeof(const xmlChar *));
1606 if (atts == NULL) goto mem_error;
1607 ctxt->atts = atts;
1608 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1609 (maxatts / 5) * sizeof(int));
1610 if (attallocs == NULL) goto mem_error;
1611 ctxt->attallocs = attallocs;
1612 ctxt->maxatts = maxatts;
1613 }
1614 return(ctxt->maxatts);
1615 mem_error:
1616 xmlErrMemory(ctxt, NULL);
1617 return(-1);
1618 }
1619
1620 /**
1621 * inputPush:
1622 * @ctxt: an XML parser context
1623 * @value: the parser input
1624 *
1625 * Pushes a new parser input on top of the input stack
1626 *
1627 * Returns -1 in case of error, the index in the stack otherwise
1628 */
1629 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1630 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1631 {
1632 if ((ctxt == NULL) || (value == NULL))
1633 return(-1);
1634 if (ctxt->inputNr >= ctxt->inputMax) {
1635 ctxt->inputMax *= 2;
1636 ctxt->inputTab =
1637 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1638 ctxt->inputMax *
1639 sizeof(ctxt->inputTab[0]));
1640 if (ctxt->inputTab == NULL) {
1641 xmlErrMemory(ctxt, NULL);
1642 xmlFreeInputStream(value);
1643 ctxt->inputMax /= 2;
1644 value = NULL;
1645 return (-1);
1646 }
1647 }
1648 ctxt->inputTab[ctxt->inputNr] = value;
1649 ctxt->input = value;
1650 return (ctxt->inputNr++);
1651 }
1652 /**
1653 * inputPop:
1654 * @ctxt: an XML parser context
1655 *
1656 * Pops the top parser input from the input stack
1657 *
1658 * Returns the input just removed
1659 */
1660 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1661 inputPop(xmlParserCtxtPtr ctxt)
1662 {
1663 xmlParserInputPtr ret;
1664
1665 if (ctxt == NULL)
1666 return(NULL);
1667 if (ctxt->inputNr <= 0)
1668 return (NULL);
1669 ctxt->inputNr--;
1670 if (ctxt->inputNr > 0)
1671 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1672 else
1673 ctxt->input = NULL;
1674 ret = ctxt->inputTab[ctxt->inputNr];
1675 ctxt->inputTab[ctxt->inputNr] = NULL;
1676 return (ret);
1677 }
1678 /**
1679 * nodePush:
1680 * @ctxt: an XML parser context
1681 * @value: the element node
1682 *
1683 * Pushes a new element node on top of the node stack
1684 *
1685 * Returns -1 in case of error, the index in the stack otherwise
1686 */
1687 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)1688 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1689 {
1690 if (ctxt == NULL) return(0);
1691 if (ctxt->nodeNr >= ctxt->nodeMax) {
1692 xmlNodePtr *tmp;
1693
1694 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1695 ctxt->nodeMax * 2 *
1696 sizeof(ctxt->nodeTab[0]));
1697 if (tmp == NULL) {
1698 xmlErrMemory(ctxt, NULL);
1699 return (-1);
1700 }
1701 ctxt->nodeTab = tmp;
1702 ctxt->nodeMax *= 2;
1703 }
1704 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1705 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1706 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1707 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1708 xmlParserMaxDepth);
1709 ctxt->instate = XML_PARSER_EOF;
1710 return(-1);
1711 }
1712 ctxt->nodeTab[ctxt->nodeNr] = value;
1713 ctxt->node = value;
1714 return (ctxt->nodeNr++);
1715 }
1716
1717 /**
1718 * nodePop:
1719 * @ctxt: an XML parser context
1720 *
1721 * Pops the top element node from the node stack
1722 *
1723 * Returns the node just removed
1724 */
1725 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)1726 nodePop(xmlParserCtxtPtr ctxt)
1727 {
1728 xmlNodePtr ret;
1729
1730 if (ctxt == NULL) return(NULL);
1731 if (ctxt->nodeNr <= 0)
1732 return (NULL);
1733 ctxt->nodeNr--;
1734 if (ctxt->nodeNr > 0)
1735 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1736 else
1737 ctxt->node = NULL;
1738 ret = ctxt->nodeTab[ctxt->nodeNr];
1739 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1740 return (ret);
1741 }
1742
1743 #ifdef LIBXML_PUSH_ENABLED
1744 /**
1745 * nameNsPush:
1746 * @ctxt: an XML parser context
1747 * @value: the element name
1748 * @prefix: the element prefix
1749 * @URI: the element namespace name
1750 *
1751 * Pushes a new element name/prefix/URL on top of the name stack
1752 *
1753 * Returns -1 in case of error, the index in the stack otherwise
1754 */
1755 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int nsNr)1756 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1757 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1758 {
1759 if (ctxt->nameNr >= ctxt->nameMax) {
1760 const xmlChar * *tmp;
1761 void **tmp2;
1762 ctxt->nameMax *= 2;
1763 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1764 ctxt->nameMax *
1765 sizeof(ctxt->nameTab[0]));
1766 if (tmp == NULL) {
1767 ctxt->nameMax /= 2;
1768 goto mem_error;
1769 }
1770 ctxt->nameTab = tmp;
1771 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1772 ctxt->nameMax * 3 *
1773 sizeof(ctxt->pushTab[0]));
1774 if (tmp2 == NULL) {
1775 ctxt->nameMax /= 2;
1776 goto mem_error;
1777 }
1778 ctxt->pushTab = tmp2;
1779 }
1780 ctxt->nameTab[ctxt->nameNr] = value;
1781 ctxt->name = value;
1782 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1783 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1784 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1785 return (ctxt->nameNr++);
1786 mem_error:
1787 xmlErrMemory(ctxt, NULL);
1788 return (-1);
1789 }
1790 /**
1791 * nameNsPop:
1792 * @ctxt: an XML parser context
1793 *
1794 * Pops the top element/prefix/URI name from the name stack
1795 *
1796 * Returns the name just removed
1797 */
1798 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)1799 nameNsPop(xmlParserCtxtPtr ctxt)
1800 {
1801 const xmlChar *ret;
1802
1803 if (ctxt->nameNr <= 0)
1804 return (NULL);
1805 ctxt->nameNr--;
1806 if (ctxt->nameNr > 0)
1807 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1808 else
1809 ctxt->name = NULL;
1810 ret = ctxt->nameTab[ctxt->nameNr];
1811 ctxt->nameTab[ctxt->nameNr] = NULL;
1812 return (ret);
1813 }
1814 #endif /* LIBXML_PUSH_ENABLED */
1815
1816 /**
1817 * namePush:
1818 * @ctxt: an XML parser context
1819 * @value: the element name
1820 *
1821 * Pushes a new element name on top of the name stack
1822 *
1823 * Returns -1 in case of error, the index in the stack otherwise
1824 */
1825 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)1826 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1827 {
1828 if (ctxt == NULL) return (-1);
1829
1830 if (ctxt->nameNr >= ctxt->nameMax) {
1831 const xmlChar * *tmp;
1832 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1833 ctxt->nameMax * 2 *
1834 sizeof(ctxt->nameTab[0]));
1835 if (tmp == NULL) {
1836 goto mem_error;
1837 }
1838 ctxt->nameTab = tmp;
1839 ctxt->nameMax *= 2;
1840 }
1841 ctxt->nameTab[ctxt->nameNr] = value;
1842 ctxt->name = value;
1843 return (ctxt->nameNr++);
1844 mem_error:
1845 xmlErrMemory(ctxt, NULL);
1846 return (-1);
1847 }
1848 /**
1849 * namePop:
1850 * @ctxt: an XML parser context
1851 *
1852 * Pops the top element name from the name stack
1853 *
1854 * Returns the name just removed
1855 */
1856 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)1857 namePop(xmlParserCtxtPtr ctxt)
1858 {
1859 const xmlChar *ret;
1860
1861 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1862 return (NULL);
1863 ctxt->nameNr--;
1864 if (ctxt->nameNr > 0)
1865 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1866 else
1867 ctxt->name = NULL;
1868 ret = ctxt->nameTab[ctxt->nameNr];
1869 ctxt->nameTab[ctxt->nameNr] = NULL;
1870 return (ret);
1871 }
1872
spacePush(xmlParserCtxtPtr ctxt,int val)1873 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1874 if (ctxt->spaceNr >= ctxt->spaceMax) {
1875 int *tmp;
1876
1877 ctxt->spaceMax *= 2;
1878 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1879 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1880 if (tmp == NULL) {
1881 xmlErrMemory(ctxt, NULL);
1882 ctxt->spaceMax /=2;
1883 return(-1);
1884 }
1885 ctxt->spaceTab = tmp;
1886 }
1887 ctxt->spaceTab[ctxt->spaceNr] = val;
1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1889 return(ctxt->spaceNr++);
1890 }
1891
spacePop(xmlParserCtxtPtr ctxt)1892 static int spacePop(xmlParserCtxtPtr ctxt) {
1893 int ret;
1894 if (ctxt->spaceNr <= 0) return(0);
1895 ctxt->spaceNr--;
1896 if (ctxt->spaceNr > 0)
1897 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1898 else
1899 ctxt->space = &ctxt->spaceTab[0];
1900 ret = ctxt->spaceTab[ctxt->spaceNr];
1901 ctxt->spaceTab[ctxt->spaceNr] = -1;
1902 return(ret);
1903 }
1904
1905 /*
1906 * Macros for accessing the content. Those should be used only by the parser,
1907 * and not exported.
1908 *
1909 * Dirty macros, i.e. one often need to make assumption on the context to
1910 * use them
1911 *
1912 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1913 * To be used with extreme caution since operations consuming
1914 * characters may move the input buffer to a different location !
1915 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1916 * This should be used internally by the parser
1917 * only to compare to ASCII values otherwise it would break when
1918 * running with UTF-8 encoding.
1919 * RAW same as CUR but in the input buffer, bypass any token
1920 * extraction that may have been done
1921 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1922 * to compare on ASCII based substring.
1923 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1924 * strings without newlines within the parser.
1925 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1926 * defined char within the parser.
1927 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1928 *
1929 * NEXT Skip to the next character, this does the proper decoding
1930 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1931 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1932 * CUR_CHAR(l) returns the current unicode character (int), set l
1933 * to the number of xmlChars used for the encoding [0-5].
1934 * CUR_SCHAR same but operate on a string instead of the context
1935 * COPY_BUF copy the current unicode char to the target buffer, increment
1936 * the index
1937 * GROW, SHRINK handling of input buffers
1938 */
1939
1940 #define RAW (*ctxt->input->cur)
1941 #define CUR (*ctxt->input->cur)
1942 #define NXT(val) ctxt->input->cur[(val)]
1943 #define CUR_PTR ctxt->input->cur
1944
1945 #define CMP4( s, c1, c2, c3, c4 ) \
1946 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1947 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1948 #define CMP5( s, c1, c2, c3, c4, c5 ) \
1949 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1950 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1951 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1952 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1953 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1954 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1955 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1956 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1957 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1958 ((unsigned char *) s)[ 8 ] == c9 )
1959 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1960 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1961 ((unsigned char *) s)[ 9 ] == c10 )
1962
1963 #define SKIP(val) do { \
1964 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1965 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1966 if ((*ctxt->input->cur == 0) && \
1967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1968 xmlPopInput(ctxt); \
1969 } while (0)
1970
1971 #define SKIPL(val) do { \
1972 int skipl; \
1973 for(skipl=0; skipl<val; skipl++) { \
1974 if (*(ctxt->input->cur) == '\n') { \
1975 ctxt->input->line++; ctxt->input->col = 1; \
1976 } else ctxt->input->col++; \
1977 ctxt->nbChars++; \
1978 ctxt->input->cur++; \
1979 } \
1980 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1981 if ((*ctxt->input->cur == 0) && \
1982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1983 xmlPopInput(ctxt); \
1984 } while (0)
1985
1986 #define SHRINK if ((ctxt->progressive == 0) && \
1987 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1988 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1989 xmlSHRINK (ctxt);
1990
xmlSHRINK(xmlParserCtxtPtr ctxt)1991 static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1992 xmlParserInputShrink(ctxt->input);
1993 if ((*ctxt->input->cur == 0) &&
1994 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1995 xmlPopInput(ctxt);
1996 }
1997
1998 #define GROW if ((ctxt->progressive == 0) && \
1999 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2000 xmlGROW (ctxt);
2001
xmlGROW(xmlParserCtxtPtr ctxt)2002 static void xmlGROW (xmlParserCtxtPtr ctxt) {
2003 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2004 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2006 xmlPopInput(ctxt);
2007 }
2008
2009 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2010
2011 #define NEXT xmlNextChar(ctxt)
2012
2013 #define NEXT1 { \
2014 ctxt->input->col++; \
2015 ctxt->input->cur++; \
2016 ctxt->nbChars++; \
2017 if (*ctxt->input->cur == 0) \
2018 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2019 }
2020
2021 #define NEXTL(l) do { \
2022 if (*(ctxt->input->cur) == '\n') { \
2023 ctxt->input->line++; ctxt->input->col = 1; \
2024 } else ctxt->input->col++; \
2025 ctxt->input->cur += l; \
2026 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2027 } while (0)
2028
2029 #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2030 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2031
2032 #define COPY_BUF(l,b,i,v) \
2033 if (l == 1) b[i++] = (xmlChar) v; \
2034 else i += xmlCopyCharMultiByte(&b[i],v)
2035
2036 /**
2037 * xmlSkipBlankChars:
2038 * @ctxt: the XML parser context
2039 *
2040 * skip all blanks character found at that point in the input streams.
2041 * It pops up finished entities in the process if allowable at that point.
2042 *
2043 * Returns the number of space chars skipped
2044 */
2045
2046 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2047 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2048 int res = 0;
2049
2050 /*
2051 * It's Okay to use CUR/NEXT here since all the blanks are on
2052 * the ASCII range.
2053 */
2054 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2055 const xmlChar *cur;
2056 /*
2057 * if we are in the document content, go really fast
2058 */
2059 cur = ctxt->input->cur;
2060 while (IS_BLANK_CH(*cur)) {
2061 if (*cur == '\n') {
2062 ctxt->input->line++; ctxt->input->col = 1;
2063 }
2064 cur++;
2065 res++;
2066 if (*cur == 0) {
2067 ctxt->input->cur = cur;
2068 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2069 cur = ctxt->input->cur;
2070 }
2071 }
2072 ctxt->input->cur = cur;
2073 } else {
2074 int cur;
2075 do {
2076 cur = CUR;
2077 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2078 NEXT;
2079 cur = CUR;
2080 res++;
2081 }
2082 while ((cur == 0) && (ctxt->inputNr > 1) &&
2083 (ctxt->instate != XML_PARSER_COMMENT)) {
2084 xmlPopInput(ctxt);
2085 cur = CUR;
2086 }
2087 /*
2088 * Need to handle support of entities branching here
2089 */
2090 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2091 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2092 }
2093 return(res);
2094 }
2095
2096 /************************************************************************
2097 * *
2098 * Commodity functions to handle entities *
2099 * *
2100 ************************************************************************/
2101
2102 /**
2103 * xmlPopInput:
2104 * @ctxt: an XML parser context
2105 *
2106 * xmlPopInput: the current input pointed by ctxt->input came to an end
2107 * pop it and return the next char.
2108 *
2109 * Returns the current xmlChar in the parser context
2110 */
2111 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2112 xmlPopInput(xmlParserCtxtPtr ctxt) {
2113 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2114 if (xmlParserDebugEntities)
2115 xmlGenericError(xmlGenericErrorContext,
2116 "Popping input %d\n", ctxt->inputNr);
2117 xmlFreeInputStream(inputPop(ctxt));
2118 if ((*ctxt->input->cur == 0) &&
2119 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2120 return(xmlPopInput(ctxt));
2121 return(CUR);
2122 }
2123
2124 /**
2125 * xmlPushInput:
2126 * @ctxt: an XML parser context
2127 * @input: an XML parser input fragment (entity, XML fragment ...).
2128 *
2129 * xmlPushInput: switch to a new input stream which is stacked on top
2130 * of the previous one(s).
2131 * Returns -1 in case of error or the index in the input stack
2132 */
2133 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2134 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2135 int ret;
2136 if (input == NULL) return(-1);
2137
2138 if (xmlParserDebugEntities) {
2139 if ((ctxt->input != NULL) && (ctxt->input->filename))
2140 xmlGenericError(xmlGenericErrorContext,
2141 "%s(%d): ", ctxt->input->filename,
2142 ctxt->input->line);
2143 xmlGenericError(xmlGenericErrorContext,
2144 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2145 }
2146 ret = inputPush(ctxt, input);
2147 GROW;
2148 return(ret);
2149 }
2150
2151 /**
2152 * xmlParseCharRef:
2153 * @ctxt: an XML parser context
2154 *
2155 * parse Reference declarations
2156 *
2157 * [66] CharRef ::= '&#' [0-9]+ ';' |
2158 * '&#x' [0-9a-fA-F]+ ';'
2159 *
2160 * [ WFC: Legal Character ]
2161 * Characters referred to using character references must match the
2162 * production for Char.
2163 *
2164 * Returns the value parsed (as an int), 0 in case of error
2165 */
2166 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2167 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2168 unsigned int val = 0;
2169 int count = 0;
2170 unsigned int outofrange = 0;
2171
2172 /*
2173 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2174 */
2175 if ((RAW == '&') && (NXT(1) == '#') &&
2176 (NXT(2) == 'x')) {
2177 SKIP(3);
2178 GROW;
2179 while (RAW != ';') { /* loop blocked by count */
2180 if (count++ > 20) {
2181 count = 0;
2182 GROW;
2183 }
2184 if ((RAW >= '0') && (RAW <= '9'))
2185 val = val * 16 + (CUR - '0');
2186 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2187 val = val * 16 + (CUR - 'a') + 10;
2188 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2189 val = val * 16 + (CUR - 'A') + 10;
2190 else {
2191 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2192 val = 0;
2193 break;
2194 }
2195 if (val > 0x10FFFF)
2196 outofrange = val;
2197
2198 NEXT;
2199 count++;
2200 }
2201 if (RAW == ';') {
2202 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2203 ctxt->input->col++;
2204 ctxt->nbChars ++;
2205 ctxt->input->cur++;
2206 }
2207 } else if ((RAW == '&') && (NXT(1) == '#')) {
2208 SKIP(2);
2209 GROW;
2210 while (RAW != ';') { /* loop blocked by count */
2211 if (count++ > 20) {
2212 count = 0;
2213 GROW;
2214 }
2215 if ((RAW >= '0') && (RAW <= '9'))
2216 val = val * 10 + (CUR - '0');
2217 else {
2218 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2219 val = 0;
2220 break;
2221 }
2222 if (val > 0x10FFFF)
2223 outofrange = val;
2224
2225 NEXT;
2226 count++;
2227 }
2228 if (RAW == ';') {
2229 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2230 ctxt->input->col++;
2231 ctxt->nbChars ++;
2232 ctxt->input->cur++;
2233 }
2234 } else {
2235 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2236 }
2237
2238 /*
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2242 */
2243 if ((IS_CHAR(val) && (outofrange == 0))) {
2244 return(val);
2245 } else {
2246 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2247 "xmlParseCharRef: invalid xmlChar value %d\n",
2248 val);
2249 }
2250 return(0);
2251 }
2252
2253 /**
2254 * xmlParseStringCharRef:
2255 * @ctxt: an XML parser context
2256 * @str: a pointer to an index in the string
2257 *
2258 * parse Reference declarations, variant parsing from a string rather
2259 * than an an input flow.
2260 *
2261 * [66] CharRef ::= '&#' [0-9]+ ';' |
2262 * '&#x' [0-9a-fA-F]+ ';'
2263 *
2264 * [ WFC: Legal Character ]
2265 * Characters referred to using character references must match the
2266 * production for Char.
2267 *
2268 * Returns the value parsed (as an int), 0 in case of error, str will be
2269 * updated to the current value of the index
2270 */
2271 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2272 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2273 const xmlChar *ptr;
2274 xmlChar cur;
2275 unsigned int val = 0;
2276 unsigned int outofrange = 0;
2277
2278 if ((str == NULL) || (*str == NULL)) return(0);
2279 ptr = *str;
2280 cur = *ptr;
2281 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2282 ptr += 3;
2283 cur = *ptr;
2284 while (cur != ';') { /* Non input consuming loop */
2285 if ((cur >= '0') && (cur <= '9'))
2286 val = val * 16 + (cur - '0');
2287 else if ((cur >= 'a') && (cur <= 'f'))
2288 val = val * 16 + (cur - 'a') + 10;
2289 else if ((cur >= 'A') && (cur <= 'F'))
2290 val = val * 16 + (cur - 'A') + 10;
2291 else {
2292 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2293 val = 0;
2294 break;
2295 }
2296 if (val > 0x10FFFF)
2297 outofrange = val;
2298
2299 ptr++;
2300 cur = *ptr;
2301 }
2302 if (cur == ';')
2303 ptr++;
2304 } else if ((cur == '&') && (ptr[1] == '#')){
2305 ptr += 2;
2306 cur = *ptr;
2307 while (cur != ';') { /* Non input consuming loops */
2308 if ((cur >= '0') && (cur <= '9'))
2309 val = val * 10 + (cur - '0');
2310 else {
2311 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2312 val = 0;
2313 break;
2314 }
2315 if (val > 0x10FFFF)
2316 outofrange = val;
2317
2318 ptr++;
2319 cur = *ptr;
2320 }
2321 if (cur == ';')
2322 ptr++;
2323 } else {
2324 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2325 return(0);
2326 }
2327 *str = ptr;
2328
2329 /*
2330 * [ WFC: Legal Character ]
2331 * Characters referred to using character references must match the
2332 * production for Char.
2333 */
2334 if ((IS_CHAR(val) && (outofrange == 0))) {
2335 return(val);
2336 } else {
2337 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2338 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2339 val);
2340 }
2341 return(0);
2342 }
2343
2344 /**
2345 * xmlNewBlanksWrapperInputStream:
2346 * @ctxt: an XML parser context
2347 * @entity: an Entity pointer
2348 *
2349 * Create a new input stream for wrapping
2350 * blanks around a PEReference
2351 *
2352 * Returns the new input stream or NULL
2353 */
2354
deallocblankswrapper(xmlChar * str)2355 static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2356
2357 static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)2358 xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2359 xmlParserInputPtr input;
2360 xmlChar *buffer;
2361 size_t length;
2362 if (entity == NULL) {
2363 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2364 "xmlNewBlanksWrapperInputStream entity\n");
2365 return(NULL);
2366 }
2367 if (xmlParserDebugEntities)
2368 xmlGenericError(xmlGenericErrorContext,
2369 "new blanks wrapper for entity: %s\n", entity->name);
2370 input = xmlNewInputStream(ctxt);
2371 if (input == NULL) {
2372 return(NULL);
2373 }
2374 length = xmlStrlen(entity->name) + 5;
2375 buffer = xmlMallocAtomic(length);
2376 if (buffer == NULL) {
2377 xmlErrMemory(ctxt, NULL);
2378 xmlFree(input);
2379 return(NULL);
2380 }
2381 buffer [0] = ' ';
2382 buffer [1] = '%';
2383 buffer [length-3] = ';';
2384 buffer [length-2] = ' ';
2385 buffer [length-1] = 0;
2386 memcpy(buffer + 2, entity->name, length - 5);
2387 input->free = deallocblankswrapper;
2388 input->base = buffer;
2389 input->cur = buffer;
2390 input->length = length;
2391 input->end = &buffer[length];
2392 return(input);
2393 }
2394
2395 /**
2396 * xmlParserHandlePEReference:
2397 * @ctxt: the parser context
2398 *
2399 * [69] PEReference ::= '%' Name ';'
2400 *
2401 * [ WFC: No Recursion ]
2402 * A parsed entity must not contain a recursive
2403 * reference to itself, either directly or indirectly.
2404 *
2405 * [ WFC: Entity Declared ]
2406 * In a document without any DTD, a document with only an internal DTD
2407 * subset which contains no parameter entity references, or a document
2408 * with "standalone='yes'", ... ... The declaration of a parameter
2409 * entity must precede any reference to it...
2410 *
2411 * [ VC: Entity Declared ]
2412 * In a document with an external subset or external parameter entities
2413 * with "standalone='no'", ... ... The declaration of a parameter entity
2414 * must precede any reference to it...
2415 *
2416 * [ WFC: In DTD ]
2417 * Parameter-entity references may only appear in the DTD.
2418 * NOTE: misleading but this is handled.
2419 *
2420 * A PEReference may have been detected in the current input stream
2421 * the handling is done accordingly to
2422 * http://www.w3.org/TR/REC-xml#entproc
2423 * i.e.
2424 * - Included in literal in entity values
2425 * - Included as Parameter Entity reference within DTDs
2426 */
2427 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2428 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2429 const xmlChar *name;
2430 xmlEntityPtr entity = NULL;
2431 xmlParserInputPtr input;
2432
2433 if (RAW != '%') return;
2434 switch(ctxt->instate) {
2435 case XML_PARSER_CDATA_SECTION:
2436 return;
2437 case XML_PARSER_COMMENT:
2438 return;
2439 case XML_PARSER_START_TAG:
2440 return;
2441 case XML_PARSER_END_TAG:
2442 return;
2443 case XML_PARSER_EOF:
2444 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2445 return;
2446 case XML_PARSER_PROLOG:
2447 case XML_PARSER_START:
2448 case XML_PARSER_MISC:
2449 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2450 return;
2451 case XML_PARSER_ENTITY_DECL:
2452 case XML_PARSER_CONTENT:
2453 case XML_PARSER_ATTRIBUTE_VALUE:
2454 case XML_PARSER_PI:
2455 case XML_PARSER_SYSTEM_LITERAL:
2456 case XML_PARSER_PUBLIC_LITERAL:
2457 /* we just ignore it there */
2458 return;
2459 case XML_PARSER_EPILOG:
2460 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2461 return;
2462 case XML_PARSER_ENTITY_VALUE:
2463 /*
2464 * NOTE: in the case of entity values, we don't do the
2465 * substitution here since we need the literal
2466 * entity value to be able to save the internal
2467 * subset of the document.
2468 * This will be handled by xmlStringDecodeEntities
2469 */
2470 return;
2471 case XML_PARSER_DTD:
2472 /*
2473 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2474 * In the internal DTD subset, parameter-entity references
2475 * can occur only where markup declarations can occur, not
2476 * within markup declarations.
2477 * In that case this is handled in xmlParseMarkupDecl
2478 */
2479 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2480 return;
2481 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2482 return;
2483 break;
2484 case XML_PARSER_IGNORE:
2485 return;
2486 }
2487
2488 NEXT;
2489 name = xmlParseName(ctxt);
2490 if (xmlParserDebugEntities)
2491 xmlGenericError(xmlGenericErrorContext,
2492 "PEReference: %s\n", name);
2493 if (name == NULL) {
2494 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2495 } else {
2496 if (RAW == ';') {
2497 NEXT;
2498 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2499 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2500 if (entity == NULL) {
2501
2502 /*
2503 * [ WFC: Entity Declared ]
2504 * In a document without any DTD, a document with only an
2505 * internal DTD subset which contains no parameter entity
2506 * references, or a document with "standalone='yes'", ...
2507 * ... The declaration of a parameter entity must precede
2508 * any reference to it...
2509 */
2510 if ((ctxt->standalone == 1) ||
2511 ((ctxt->hasExternalSubset == 0) &&
2512 (ctxt->hasPErefs == 0))) {
2513 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2514 "PEReference: %%%s; not found\n", name);
2515 } else {
2516 /*
2517 * [ VC: Entity Declared ]
2518 * In a document with an external subset or external
2519 * parameter entities with "standalone='no'", ...
2520 * ... The declaration of a parameter entity must precede
2521 * any reference to it...
2522 */
2523 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2524 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2525 "PEReference: %%%s; not found\n",
2526 name, NULL);
2527 } else
2528 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2529 "PEReference: %%%s; not found\n",
2530 name, NULL);
2531 ctxt->valid = 0;
2532 }
2533 } else if (ctxt->input->free != deallocblankswrapper) {
2534 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2535 if (xmlPushInput(ctxt, input) < 0)
2536 return;
2537 } else {
2538 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2539 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2540 xmlChar start[4];
2541 xmlCharEncoding enc;
2542
2543 /*
2544 * handle the extra spaces added before and after
2545 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2546 * this is done independently.
2547 */
2548 input = xmlNewEntityInputStream(ctxt, entity);
2549 if (xmlPushInput(ctxt, input) < 0)
2550 return;
2551
2552 /*
2553 * Get the 4 first bytes and decode the charset
2554 * if enc != XML_CHAR_ENCODING_NONE
2555 * plug some encoding conversion routines.
2556 * Note that, since we may have some non-UTF8
2557 * encoding (like UTF16, bug 135229), the 'length'
2558 * is not known, but we can calculate based upon
2559 * the amount of data in the buffer.
2560 */
2561 GROW
2562 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2563 start[0] = RAW;
2564 start[1] = NXT(1);
2565 start[2] = NXT(2);
2566 start[3] = NXT(3);
2567 enc = xmlDetectCharEncoding(start, 4);
2568 if (enc != XML_CHAR_ENCODING_NONE) {
2569 xmlSwitchEncoding(ctxt, enc);
2570 }
2571 }
2572
2573 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2574 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2575 (IS_BLANK_CH(NXT(5)))) {
2576 xmlParseTextDecl(ctxt);
2577 }
2578 } else {
2579 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2580 "PEReference: %s is not a parameter entity\n",
2581 name);
2582 }
2583 }
2584 } else {
2585 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2586 }
2587 }
2588 }
2589
2590 /*
2591 * Macro used to grow the current buffer.
2592 */
2593 #define growBuffer(buffer, n) { \
2594 xmlChar *tmp; \
2595 buffer##_size *= 2; \
2596 buffer##_size += n; \
2597 tmp = (xmlChar *) \
2598 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2599 if (tmp == NULL) goto mem_error; \
2600 buffer = tmp; \
2601 }
2602
2603 /**
2604 * xmlStringLenDecodeEntities:
2605 * @ctxt: the parser context
2606 * @str: the input string
2607 * @len: the string length
2608 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2609 * @end: an end marker xmlChar, 0 if none
2610 * @end2: an end marker xmlChar, 0 if none
2611 * @end3: an end marker xmlChar, 0 if none
2612 *
2613 * Takes a entity string content and process to do the adequate substitutions.
2614 *
2615 * [67] Reference ::= EntityRef | CharRef
2616 *
2617 * [69] PEReference ::= '%' Name ';'
2618 *
2619 * Returns A newly allocated string with the substitution done. The caller
2620 * must deallocate it !
2621 */
2622 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what,xmlChar end,xmlChar end2,xmlChar end3)2623 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2624 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2625 xmlChar *buffer = NULL;
2626 int buffer_size = 0;
2627
2628 xmlChar *current = NULL;
2629 xmlChar *rep = NULL;
2630 const xmlChar *last;
2631 xmlEntityPtr ent;
2632 int c,l;
2633 int nbchars = 0;
2634
2635 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2636 return(NULL);
2637 last = str + len;
2638
2639 if (((ctxt->depth > 40) &&
2640 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2641 (ctxt->depth > 1024)) {
2642 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2643 return(NULL);
2644 }
2645
2646 /*
2647 * allocate a translation buffer.
2648 */
2649 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2650 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2651 if (buffer == NULL) goto mem_error;
2652
2653 /*
2654 * OK loop until we reach one of the ending char or a size limit.
2655 * we are operating on already parsed values.
2656 */
2657 if (str < last)
2658 c = CUR_SCHAR(str, l);
2659 else
2660 c = 0;
2661 while ((c != 0) && (c != end) && /* non input consuming loop */
2662 (c != end2) && (c != end3)) {
2663
2664 if (c == 0) break;
2665 if ((c == '&') && (str[1] == '#')) {
2666 int val = xmlParseStringCharRef(ctxt, &str);
2667 if (val != 0) {
2668 COPY_BUF(0,buffer,nbchars,val);
2669 }
2670 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2671 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2672 }
2673 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2674 if (xmlParserDebugEntities)
2675 xmlGenericError(xmlGenericErrorContext,
2676 "String decoding Entity Reference: %.30s\n",
2677 str);
2678 ent = xmlParseStringEntityRef(ctxt, &str);
2679 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2680 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2681 goto int_error;
2682 if (ent != NULL)
2683 ctxt->nbentities += ent->checked;
2684 if ((ent != NULL) &&
2685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2686 if (ent->content != NULL) {
2687 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2688 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2689 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2690 }
2691 } else {
2692 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2693 "predefined entity has no content\n");
2694 }
2695 } else if ((ent != NULL) && (ent->content != NULL)) {
2696 ctxt->depth++;
2697 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2698 0, 0, 0);
2699 ctxt->depth--;
2700
2701 if (rep != NULL) {
2702 current = rep;
2703 while (*current != 0) { /* non input consuming loop */
2704 buffer[nbchars++] = *current++;
2705 if (nbchars >
2706 buffer_size - XML_PARSER_BUFFER_SIZE) {
2707 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2708 goto int_error;
2709 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2710 }
2711 }
2712 xmlFree(rep);
2713 rep = NULL;
2714 }
2715 } else if (ent != NULL) {
2716 int i = xmlStrlen(ent->name);
2717 const xmlChar *cur = ent->name;
2718
2719 buffer[nbchars++] = '&';
2720 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2721 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2722 }
2723 for (;i > 0;i--)
2724 buffer[nbchars++] = *cur++;
2725 buffer[nbchars++] = ';';
2726 }
2727 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2728 if (xmlParserDebugEntities)
2729 xmlGenericError(xmlGenericErrorContext,
2730 "String decoding PE Reference: %.30s\n", str);
2731 ent = xmlParseStringPEReference(ctxt, &str);
2732 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2733 goto int_error;
2734 if (ent != NULL)
2735 ctxt->nbentities += ent->checked;
2736 if (ent != NULL) {
2737 if (ent->content == NULL) {
2738 xmlLoadEntityContent(ctxt, ent);
2739 }
2740 ctxt->depth++;
2741 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2742 0, 0, 0);
2743 ctxt->depth--;
2744 if (rep != NULL) {
2745 current = rep;
2746 while (*current != 0) { /* non input consuming loop */
2747 buffer[nbchars++] = *current++;
2748 if (nbchars >
2749 buffer_size - XML_PARSER_BUFFER_SIZE) {
2750 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2751 goto int_error;
2752 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2753 }
2754 }
2755 xmlFree(rep);
2756 rep = NULL;
2757 }
2758 }
2759 } else {
2760 COPY_BUF(l,buffer,nbchars,c);
2761 str += l;
2762 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2763 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2764 }
2765 }
2766 if (str < last)
2767 c = CUR_SCHAR(str, l);
2768 else
2769 c = 0;
2770 }
2771 buffer[nbchars] = 0;
2772 return(buffer);
2773
2774 mem_error:
2775 xmlErrMemory(ctxt, NULL);
2776 int_error:
2777 if (rep != NULL)
2778 xmlFree(rep);
2779 if (buffer != NULL)
2780 xmlFree(buffer);
2781 return(NULL);
2782 }
2783
2784 /**
2785 * xmlStringDecodeEntities:
2786 * @ctxt: the parser context
2787 * @str: the input string
2788 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2789 * @end: an end marker xmlChar, 0 if none
2790 * @end2: an end marker xmlChar, 0 if none
2791 * @end3: an end marker xmlChar, 0 if none
2792 *
2793 * Takes a entity string content and process to do the adequate substitutions.
2794 *
2795 * [67] Reference ::= EntityRef | CharRef
2796 *
2797 * [69] PEReference ::= '%' Name ';'
2798 *
2799 * Returns A newly allocated string with the substitution done. The caller
2800 * must deallocate it !
2801 */
2802 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what,xmlChar end,xmlChar end2,xmlChar end3)2803 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2804 xmlChar end, xmlChar end2, xmlChar end3) {
2805 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2806 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2807 end, end2, end3));
2808 }
2809
2810 /************************************************************************
2811 * *
2812 * Commodity functions, cleanup needed ? *
2813 * *
2814 ************************************************************************/
2815
2816 /**
2817 * areBlanks:
2818 * @ctxt: an XML parser context
2819 * @str: a xmlChar *
2820 * @len: the size of @str
2821 * @blank_chars: we know the chars are blanks
2822 *
2823 * Is this a sequence of blank chars that one can ignore ?
2824 *
2825 * Returns 1 if ignorable 0 otherwise.
2826 */
2827
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2828 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2829 int blank_chars) {
2830 int i, ret;
2831 xmlNodePtr lastChild;
2832
2833 /*
2834 * Don't spend time trying to differentiate them, the same callback is
2835 * used !
2836 */
2837 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2838 return(0);
2839
2840 /*
2841 * Check for xml:space value.
2842 */
2843 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2844 (*(ctxt->space) == -2))
2845 return(0);
2846
2847 /*
2848 * Check that the string is made of blanks
2849 */
2850 if (blank_chars == 0) {
2851 for (i = 0;i < len;i++)
2852 if (!(IS_BLANK_CH(str[i]))) return(0);
2853 }
2854
2855 /*
2856 * Look if the element is mixed content in the DTD if available
2857 */
2858 if (ctxt->node == NULL) return(0);
2859 if (ctxt->myDoc != NULL) {
2860 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2861 if (ret == 0) return(1);
2862 if (ret == 1) return(0);
2863 }
2864
2865 /*
2866 * Otherwise, heuristic :-\
2867 */
2868 if ((RAW != '<') && (RAW != 0xD)) return(0);
2869 if ((ctxt->node->children == NULL) &&
2870 (RAW == '<') && (NXT(1) == '/')) return(0);
2871
2872 lastChild = xmlGetLastChild(ctxt->node);
2873 if (lastChild == NULL) {
2874 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2875 (ctxt->node->content != NULL)) return(0);
2876 } else if (xmlNodeIsText(lastChild))
2877 return(0);
2878 else if ((ctxt->node->children != NULL) &&
2879 (xmlNodeIsText(ctxt->node->children)))
2880 return(0);
2881 return(1);
2882 }
2883
2884 /************************************************************************
2885 * *
2886 * Extra stuff for namespace support *
2887 * Relates to http://www.w3.org/TR/WD-xml-names *
2888 * *
2889 ************************************************************************/
2890
2891 /**
2892 * xmlSplitQName:
2893 * @ctxt: an XML parser context
2894 * @name: an XML parser context
2895 * @prefix: a xmlChar **
2896 *
2897 * parse an UTF8 encoded XML qualified name string
2898 *
2899 * [NS 5] QName ::= (Prefix ':')? LocalPart
2900 *
2901 * [NS 6] Prefix ::= NCName
2902 *
2903 * [NS 7] LocalPart ::= NCName
2904 *
2905 * Returns the local part, and prefix is updated
2906 * to get the Prefix if any.
2907 */
2908
2909 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefix)2910 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2911 xmlChar buf[XML_MAX_NAMELEN + 5];
2912 xmlChar *buffer = NULL;
2913 int len = 0;
2914 int max = XML_MAX_NAMELEN;
2915 xmlChar *ret = NULL;
2916 const xmlChar *cur = name;
2917 int c;
2918
2919 if (prefix == NULL) return(NULL);
2920 *prefix = NULL;
2921
2922 if (cur == NULL) return(NULL);
2923
2924 #ifndef XML_XML_NAMESPACE
2925 /* xml: prefix is not really a namespace */
2926 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2927 (cur[2] == 'l') && (cur[3] == ':'))
2928 return(xmlStrdup(name));
2929 #endif
2930
2931 /* nasty but well=formed */
2932 if (cur[0] == ':')
2933 return(xmlStrdup(name));
2934
2935 c = *cur++;
2936 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2937 buf[len++] = c;
2938 c = *cur++;
2939 }
2940 if (len >= max) {
2941 /*
2942 * Okay someone managed to make a huge name, so he's ready to pay
2943 * for the processing speed.
2944 */
2945 max = len * 2;
2946
2947 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2948 if (buffer == NULL) {
2949 xmlErrMemory(ctxt, NULL);
2950 return(NULL);
2951 }
2952 memcpy(buffer, buf, len);
2953 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2954 if (len + 10 > max) {
2955 xmlChar *tmp;
2956
2957 max *= 2;
2958 tmp = (xmlChar *) xmlRealloc(buffer,
2959 max * sizeof(xmlChar));
2960 if (tmp == NULL) {
2961 xmlFree(buffer);
2962 xmlErrMemory(ctxt, NULL);
2963 return(NULL);
2964 }
2965 buffer = tmp;
2966 }
2967 buffer[len++] = c;
2968 c = *cur++;
2969 }
2970 buffer[len] = 0;
2971 }
2972
2973 if ((c == ':') && (*cur == 0)) {
2974 if (buffer != NULL)
2975 xmlFree(buffer);
2976 *prefix = NULL;
2977 return(xmlStrdup(name));
2978 }
2979
2980 if (buffer == NULL)
2981 ret = xmlStrndup(buf, len);
2982 else {
2983 ret = buffer;
2984 buffer = NULL;
2985 max = XML_MAX_NAMELEN;
2986 }
2987
2988
2989 if (c == ':') {
2990 c = *cur;
2991 *prefix = ret;
2992 if (c == 0) {
2993 return(xmlStrndup(BAD_CAST "", 0));
2994 }
2995 len = 0;
2996
2997 /*
2998 * Check that the first character is proper to start
2999 * a new name
3000 */
3001 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3002 ((c >= 0x41) && (c <= 0x5A)) ||
3003 (c == '_') || (c == ':'))) {
3004 int l;
3005 int first = CUR_SCHAR(cur, l);
3006
3007 if (!IS_LETTER(first) && (first != '_')) {
3008 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3009 "Name %s is not XML Namespace compliant\n",
3010 name);
3011 }
3012 }
3013 cur++;
3014
3015 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3016 buf[len++] = c;
3017 c = *cur++;
3018 }
3019 if (len >= max) {
3020 /*
3021 * Okay someone managed to make a huge name, so he's ready to pay
3022 * for the processing speed.
3023 */
3024 max = len * 2;
3025
3026 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3027 if (buffer == NULL) {
3028 xmlErrMemory(ctxt, NULL);
3029 return(NULL);
3030 }
3031 memcpy(buffer, buf, len);
3032 while (c != 0) { /* tested bigname2.xml */
3033 if (len + 10 > max) {
3034 xmlChar *tmp;
3035
3036 max *= 2;
3037 tmp = (xmlChar *) xmlRealloc(buffer,
3038 max * sizeof(xmlChar));
3039 if (tmp == NULL) {
3040 xmlErrMemory(ctxt, NULL);
3041 xmlFree(buffer);
3042 return(NULL);
3043 }
3044 buffer = tmp;
3045 }
3046 buffer[len++] = c;
3047 c = *cur++;
3048 }
3049 buffer[len] = 0;
3050 }
3051
3052 if (buffer == NULL)
3053 ret = xmlStrndup(buf, len);
3054 else {
3055 ret = buffer;
3056 }
3057 }
3058
3059 return(ret);
3060 }
3061
3062 /************************************************************************
3063 * *
3064 * The parser itself *
3065 * Relates to http://www.w3.org/TR/REC-xml *
3066 * *
3067 ************************************************************************/
3068
3069 /************************************************************************
3070 * *
3071 * Routines to parse Name, NCName and NmToken *
3072 * *
3073 ************************************************************************/
3074 #ifdef DEBUG
3075 static unsigned long nbParseName = 0;
3076 static unsigned long nbParseNmToken = 0;
3077 static unsigned long nbParseNCName = 0;
3078 static unsigned long nbParseNCNameComplex = 0;
3079 static unsigned long nbParseNameComplex = 0;
3080 static unsigned long nbParseStringName = 0;
3081 #endif
3082
3083 /*
3084 * The two following functions are related to the change of accepted
3085 * characters for Name and NmToken in the Revision 5 of XML-1.0
3086 * They correspond to the modified production [4] and the new production [4a]
3087 * changes in that revision. Also note that the macros used for the
3088 * productions Letter, Digit, CombiningChar and Extender are not needed
3089 * anymore.
3090 * We still keep compatibility to pre-revision5 parsing semantic if the
3091 * new XML_PARSE_OLD10 option is given to the parser.
3092 */
3093 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3094 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3095 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3096 /*
3097 * Use the new checks of production [4] [4a] amd [5] of the
3098 * Update 5 of XML-1.0
3099 */
3100 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3101 (((c >= 'a') && (c <= 'z')) ||
3102 ((c >= 'A') && (c <= 'Z')) ||
3103 (c == '_') || (c == ':') ||
3104 ((c >= 0xC0) && (c <= 0xD6)) ||
3105 ((c >= 0xD8) && (c <= 0xF6)) ||
3106 ((c >= 0xF8) && (c <= 0x2FF)) ||
3107 ((c >= 0x370) && (c <= 0x37D)) ||
3108 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3109 ((c >= 0x200C) && (c <= 0x200D)) ||
3110 ((c >= 0x2070) && (c <= 0x218F)) ||
3111 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3112 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3113 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3114 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3115 ((c >= 0x10000) && (c <= 0xEFFFF))))
3116 return(1);
3117 } else {
3118 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3119 return(1);
3120 }
3121 return(0);
3122 }
3123
3124 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3125 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3126 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3127 /*
3128 * Use the new checks of production [4] [4a] amd [5] of the
3129 * Update 5 of XML-1.0
3130 */
3131 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3132 (((c >= 'a') && (c <= 'z')) ||
3133 ((c >= 'A') && (c <= 'Z')) ||
3134 ((c >= '0') && (c <= '9')) || /* !start */
3135 (c == '_') || (c == ':') ||
3136 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3137 ((c >= 0xC0) && (c <= 0xD6)) ||
3138 ((c >= 0xD8) && (c <= 0xF6)) ||
3139 ((c >= 0xF8) && (c <= 0x2FF)) ||
3140 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3141 ((c >= 0x370) && (c <= 0x37D)) ||
3142 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143 ((c >= 0x200C) && (c <= 0x200D)) ||
3144 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3145 ((c >= 0x2070) && (c <= 0x218F)) ||
3146 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3147 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3148 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3149 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3150 ((c >= 0x10000) && (c <= 0xEFFFF))))
3151 return(1);
3152 } else {
3153 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3154 (c == '.') || (c == '-') ||
3155 (c == '_') || (c == ':') ||
3156 (IS_COMBINING(c)) ||
3157 (IS_EXTENDER(c)))
3158 return(1);
3159 }
3160 return(0);
3161 }
3162
3163 static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3164 int *len, int *alloc, int normalize);
3165
3166 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3167 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3168 int len = 0, l;
3169 int c;
3170 int count = 0;
3171
3172 #ifdef DEBUG
3173 nbParseNameComplex++;
3174 #endif
3175
3176 /*
3177 * Handler for more complex cases
3178 */
3179 GROW;
3180 c = CUR_CHAR(l);
3181 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3182 /*
3183 * Use the new checks of production [4] [4a] amd [5] of the
3184 * Update 5 of XML-1.0
3185 */
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!(((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 (c == '_') || (c == ':') ||
3190 ((c >= 0xC0) && (c <= 0xD6)) ||
3191 ((c >= 0xD8) && (c <= 0xF6)) ||
3192 ((c >= 0xF8) && (c <= 0x2FF)) ||
3193 ((c >= 0x370) && (c <= 0x37D)) ||
3194 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195 ((c >= 0x200C) && (c <= 0x200D)) ||
3196 ((c >= 0x2070) && (c <= 0x218F)) ||
3197 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3198 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3199 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3200 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3201 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3202 return(NULL);
3203 }
3204 len += l;
3205 NEXTL(l);
3206 c = CUR_CHAR(l);
3207 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 ((c >= '0') && (c <= '9')) || /* !start */
3211 (c == '_') || (c == ':') ||
3212 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3213 ((c >= 0xC0) && (c <= 0xD6)) ||
3214 ((c >= 0xD8) && (c <= 0xF6)) ||
3215 ((c >= 0xF8) && (c <= 0x2FF)) ||
3216 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3217 ((c >= 0x370) && (c <= 0x37D)) ||
3218 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3219 ((c >= 0x200C) && (c <= 0x200D)) ||
3220 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3221 ((c >= 0x2070) && (c <= 0x218F)) ||
3222 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3223 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3224 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3225 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3226 ((c >= 0x10000) && (c <= 0xEFFFF))
3227 )) {
3228 if (count++ > 100) {
3229 count = 0;
3230 GROW;
3231 }
3232 len += l;
3233 NEXTL(l);
3234 c = CUR_CHAR(l);
3235 }
3236 } else {
3237 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238 (!IS_LETTER(c) && (c != '_') &&
3239 (c != ':'))) {
3240 return(NULL);
3241 }
3242 len += l;
3243 NEXTL(l);
3244 c = CUR_CHAR(l);
3245
3246 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3247 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3248 (c == '.') || (c == '-') ||
3249 (c == '_') || (c == ':') ||
3250 (IS_COMBINING(c)) ||
3251 (IS_EXTENDER(c)))) {
3252 if (count++ > 100) {
3253 count = 0;
3254 GROW;
3255 }
3256 len += l;
3257 NEXTL(l);
3258 c = CUR_CHAR(l);
3259 }
3260 }
3261 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3262 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3263 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3264 }
3265
3266 /**
3267 * xmlParseName:
3268 * @ctxt: an XML parser context
3269 *
3270 * parse an XML name.
3271 *
3272 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3273 * CombiningChar | Extender
3274 *
3275 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3276 *
3277 * [6] Names ::= Name (#x20 Name)*
3278 *
3279 * Returns the Name parsed or NULL
3280 */
3281
3282 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3283 xmlParseName(xmlParserCtxtPtr ctxt) {
3284 const xmlChar *in;
3285 const xmlChar *ret;
3286 int count = 0;
3287
3288 GROW;
3289
3290 #ifdef DEBUG
3291 nbParseName++;
3292 #endif
3293
3294 /*
3295 * Accelerator for simple ASCII names
3296 */
3297 in = ctxt->input->cur;
3298 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3299 ((*in >= 0x41) && (*in <= 0x5A)) ||
3300 (*in == '_') || (*in == ':')) {
3301 in++;
3302 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3303 ((*in >= 0x41) && (*in <= 0x5A)) ||
3304 ((*in >= 0x30) && (*in <= 0x39)) ||
3305 (*in == '_') || (*in == '-') ||
3306 (*in == ':') || (*in == '.'))
3307 in++;
3308 if ((*in > 0) && (*in < 0x80)) {
3309 count = in - ctxt->input->cur;
3310 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3311 ctxt->input->cur = in;
3312 ctxt->nbChars += count;
3313 ctxt->input->col += count;
3314 if (ret == NULL)
3315 xmlErrMemory(ctxt, NULL);
3316 return(ret);
3317 }
3318 }
3319 /* accelerator for special cases */
3320 return(xmlParseNameComplex(ctxt));
3321 }
3322
3323 static const xmlChar *
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3324 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3325 int len = 0, l;
3326 int c;
3327 int count = 0;
3328
3329 #ifdef DEBUG
3330 nbParseNCNameComplex++;
3331 #endif
3332
3333 /*
3334 * Handler for more complex cases
3335 */
3336 GROW;
3337 c = CUR_CHAR(l);
3338 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3339 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3340 return(NULL);
3341 }
3342
3343 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3344 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3345 if (count++ > 100) {
3346 count = 0;
3347 GROW;
3348 }
3349 len += l;
3350 NEXTL(l);
3351 c = CUR_CHAR(l);
3352 }
3353 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3354 }
3355
3356 /**
3357 * xmlParseNCName:
3358 * @ctxt: an XML parser context
3359 * @len: lenght of the string parsed
3360 *
3361 * parse an XML name.
3362 *
3363 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3364 * CombiningChar | Extender
3365 *
3366 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3367 *
3368 * Returns the Name parsed or NULL
3369 */
3370
3371 static const xmlChar *
xmlParseNCName(xmlParserCtxtPtr ctxt)3372 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3373 const xmlChar *in;
3374 const xmlChar *ret;
3375 int count = 0;
3376
3377 #ifdef DEBUG
3378 nbParseNCName++;
3379 #endif
3380
3381 /*
3382 * Accelerator for simple ASCII names
3383 */
3384 in = ctxt->input->cur;
3385 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3386 ((*in >= 0x41) && (*in <= 0x5A)) ||
3387 (*in == '_')) {
3388 in++;
3389 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3390 ((*in >= 0x41) && (*in <= 0x5A)) ||
3391 ((*in >= 0x30) && (*in <= 0x39)) ||
3392 (*in == '_') || (*in == '-') ||
3393 (*in == '.'))
3394 in++;
3395 if ((*in > 0) && (*in < 0x80)) {
3396 count = in - ctxt->input->cur;
3397 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3398 ctxt->input->cur = in;
3399 ctxt->nbChars += count;
3400 ctxt->input->col += count;
3401 if (ret == NULL) {
3402 xmlErrMemory(ctxt, NULL);
3403 }
3404 return(ret);
3405 }
3406 }
3407 return(xmlParseNCNameComplex(ctxt));
3408 }
3409
3410 /**
3411 * xmlParseNameAndCompare:
3412 * @ctxt: an XML parser context
3413 *
3414 * parse an XML name and compares for match
3415 * (specialized for endtag parsing)
3416 *
3417 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3418 * and the name for mismatch
3419 */
3420
3421 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3422 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3423 register const xmlChar *cmp = other;
3424 register const xmlChar *in;
3425 const xmlChar *ret;
3426
3427 GROW;
3428
3429 in = ctxt->input->cur;
3430 while (*in != 0 && *in == *cmp) {
3431 ++in;
3432 ++cmp;
3433 ctxt->input->col++;
3434 }
3435 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3436 /* success */
3437 ctxt->input->cur = in;
3438 return (const xmlChar*) 1;
3439 }
3440 /* failure (or end of input buffer), check with full function */
3441 ret = xmlParseName (ctxt);
3442 /* strings coming from the dictionnary direct compare possible */
3443 if (ret == other) {
3444 return (const xmlChar*) 1;
3445 }
3446 return ret;
3447 }
3448
3449 /**
3450 * xmlParseStringName:
3451 * @ctxt: an XML parser context
3452 * @str: a pointer to the string pointer (IN/OUT)
3453 *
3454 * parse an XML name.
3455 *
3456 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3457 * CombiningChar | Extender
3458 *
3459 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3460 *
3461 * [6] Names ::= Name (#x20 Name)*
3462 *
3463 * Returns the Name parsed or NULL. The @str pointer
3464 * is updated to the current location in the string.
3465 */
3466
3467 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3468 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3469 xmlChar buf[XML_MAX_NAMELEN + 5];
3470 const xmlChar *cur = *str;
3471 int len = 0, l;
3472 int c;
3473
3474 #ifdef DEBUG
3475 nbParseStringName++;
3476 #endif
3477
3478 c = CUR_SCHAR(cur, l);
3479 if (!xmlIsNameStartChar(ctxt, c)) {
3480 return(NULL);
3481 }
3482
3483 COPY_BUF(l,buf,len,c);
3484 cur += l;
3485 c = CUR_SCHAR(cur, l);
3486 while (xmlIsNameChar(ctxt, c)) {
3487 COPY_BUF(l,buf,len,c);
3488 cur += l;
3489 c = CUR_SCHAR(cur, l);
3490 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3491 /*
3492 * Okay someone managed to make a huge name, so he's ready to pay
3493 * for the processing speed.
3494 */
3495 xmlChar *buffer;
3496 int max = len * 2;
3497
3498 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3499 if (buffer == NULL) {
3500 xmlErrMemory(ctxt, NULL);
3501 return(NULL);
3502 }
3503 memcpy(buffer, buf, len);
3504 while (xmlIsNameChar(ctxt, c)) {
3505 if (len + 10 > max) {
3506 xmlChar *tmp;
3507 max *= 2;
3508 tmp = (xmlChar *) xmlRealloc(buffer,
3509 max * sizeof(xmlChar));
3510 if (tmp == NULL) {
3511 xmlErrMemory(ctxt, NULL);
3512 xmlFree(buffer);
3513 return(NULL);
3514 }
3515 buffer = tmp;
3516 }
3517 COPY_BUF(l,buffer,len,c);
3518 cur += l;
3519 c = CUR_SCHAR(cur, l);
3520 }
3521 buffer[len] = 0;
3522 *str = cur;
3523 return(buffer);
3524 }
3525 }
3526 *str = cur;
3527 return(xmlStrndup(buf, len));
3528 }
3529
3530 /**
3531 * xmlParseNmtoken:
3532 * @ctxt: an XML parser context
3533 *
3534 * parse an XML Nmtoken.
3535 *
3536 * [7] Nmtoken ::= (NameChar)+
3537 *
3538 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3539 *
3540 * Returns the Nmtoken parsed or NULL
3541 */
3542
3543 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3544 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3545 xmlChar buf[XML_MAX_NAMELEN + 5];
3546 int len = 0, l;
3547 int c;
3548 int count = 0;
3549
3550 #ifdef DEBUG
3551 nbParseNmToken++;
3552 #endif
3553
3554 GROW;
3555 c = CUR_CHAR(l);
3556
3557 while (xmlIsNameChar(ctxt, c)) {
3558 if (count++ > 100) {
3559 count = 0;
3560 GROW;
3561 }
3562 COPY_BUF(l,buf,len,c);
3563 NEXTL(l);
3564 c = CUR_CHAR(l);
3565 if (len >= XML_MAX_NAMELEN) {
3566 /*
3567 * Okay someone managed to make a huge token, so he's ready to pay
3568 * for the processing speed.
3569 */
3570 xmlChar *buffer;
3571 int max = len * 2;
3572
3573 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3574 if (buffer == NULL) {
3575 xmlErrMemory(ctxt, NULL);
3576 return(NULL);
3577 }
3578 memcpy(buffer, buf, len);
3579 while (xmlIsNameChar(ctxt, c)) {
3580 if (count++ > 100) {
3581 count = 0;
3582 GROW;
3583 }
3584 if (len + 10 > max) {
3585 xmlChar *tmp;
3586
3587 max *= 2;
3588 tmp = (xmlChar *) xmlRealloc(buffer,
3589 max * sizeof(xmlChar));
3590 if (tmp == NULL) {
3591 xmlErrMemory(ctxt, NULL);
3592 xmlFree(buffer);
3593 return(NULL);
3594 }
3595 buffer = tmp;
3596 }
3597 COPY_BUF(l,buffer,len,c);
3598 NEXTL(l);
3599 c = CUR_CHAR(l);
3600 }
3601 buffer[len] = 0;
3602 return(buffer);
3603 }
3604 }
3605 if (len == 0)
3606 return(NULL);
3607 return(xmlStrndup(buf, len));
3608 }
3609
3610 /**
3611 * xmlParseEntityValue:
3612 * @ctxt: an XML parser context
3613 * @orig: if non-NULL store a copy of the original entity value
3614 *
3615 * parse a value for ENTITY declarations
3616 *
3617 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3618 * "'" ([^%&'] | PEReference | Reference)* "'"
3619 *
3620 * Returns the EntityValue parsed with reference substituted or NULL
3621 */
3622
3623 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3624 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3625 xmlChar *buf = NULL;
3626 int len = 0;
3627 int size = XML_PARSER_BUFFER_SIZE;
3628 int c, l;
3629 xmlChar stop;
3630 xmlChar *ret = NULL;
3631 const xmlChar *cur = NULL;
3632 xmlParserInputPtr input;
3633
3634 if (RAW == '"') stop = '"';
3635 else if (RAW == '\'') stop = '\'';
3636 else {
3637 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3638 return(NULL);
3639 }
3640 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3641 if (buf == NULL) {
3642 xmlErrMemory(ctxt, NULL);
3643 return(NULL);
3644 }
3645
3646 /*
3647 * The content of the entity definition is copied in a buffer.
3648 */
3649
3650 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3651 input = ctxt->input;
3652 GROW;
3653 NEXT;
3654 c = CUR_CHAR(l);
3655 /*
3656 * NOTE: 4.4.5 Included in Literal
3657 * When a parameter entity reference appears in a literal entity
3658 * value, ... a single or double quote character in the replacement
3659 * text is always treated as a normal data character and will not
3660 * terminate the literal.
3661 * In practice it means we stop the loop only when back at parsing
3662 * the initial entity and the quote is found
3663 */
3664 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3665 (ctxt->input != input))) {
3666 if (len + 5 >= size) {
3667 xmlChar *tmp;
3668
3669 size *= 2;
3670 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3671 if (tmp == NULL) {
3672 xmlErrMemory(ctxt, NULL);
3673 xmlFree(buf);
3674 return(NULL);
3675 }
3676 buf = tmp;
3677 }
3678 COPY_BUF(l,buf,len,c);
3679 NEXTL(l);
3680 /*
3681 * Pop-up of finished entities.
3682 */
3683 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3684 xmlPopInput(ctxt);
3685
3686 GROW;
3687 c = CUR_CHAR(l);
3688 if (c == 0) {
3689 GROW;
3690 c = CUR_CHAR(l);
3691 }
3692 }
3693 buf[len] = 0;
3694
3695 /*
3696 * Raise problem w.r.t. '&' and '%' being used in non-entities
3697 * reference constructs. Note Charref will be handled in
3698 * xmlStringDecodeEntities()
3699 */
3700 cur = buf;
3701 while (*cur != 0) { /* non input consuming */
3702 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3703 xmlChar *name;
3704 xmlChar tmp = *cur;
3705
3706 cur++;
3707 name = xmlParseStringName(ctxt, &cur);
3708 if ((name == NULL) || (*cur != ';')) {
3709 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3710 "EntityValue: '%c' forbidden except for entities references\n",
3711 tmp);
3712 }
3713 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3714 (ctxt->inputNr == 1)) {
3715 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3716 }
3717 if (name != NULL)
3718 xmlFree(name);
3719 if (*cur == 0)
3720 break;
3721 }
3722 cur++;
3723 }
3724
3725 /*
3726 * Then PEReference entities are substituted.
3727 */
3728 if (c != stop) {
3729 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3730 xmlFree(buf);
3731 } else {
3732 NEXT;
3733 /*
3734 * NOTE: 4.4.7 Bypassed
3735 * When a general entity reference appears in the EntityValue in
3736 * an entity declaration, it is bypassed and left as is.
3737 * so XML_SUBSTITUTE_REF is not set here.
3738 */
3739 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3740 0, 0, 0);
3741 if (orig != NULL)
3742 *orig = buf;
3743 else
3744 xmlFree(buf);
3745 }
3746
3747 return(ret);
3748 }
3749
3750 /**
3751 * xmlParseAttValueComplex:
3752 * @ctxt: an XML parser context
3753 * @len: the resulting attribute len
3754 * @normalize: wether to apply the inner normalization
3755 *
3756 * parse a value for an attribute, this is the fallback function
3757 * of xmlParseAttValue() when the attribute parsing requires handling
3758 * of non-ASCII characters, or normalization compaction.
3759 *
3760 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3761 */
3762 static xmlChar *
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt,int * attlen,int normalize)3763 xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3764 xmlChar limit = 0;
3765 xmlChar *buf = NULL;
3766 xmlChar *rep = NULL;
3767 int len = 0;
3768 int buf_size = 0;
3769 int c, l, in_space = 0;
3770 xmlChar *current = NULL;
3771 xmlEntityPtr ent;
3772
3773 if (NXT(0) == '"') {
3774 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3775 limit = '"';
3776 NEXT;
3777 } else if (NXT(0) == '\'') {
3778 limit = '\'';
3779 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3780 NEXT;
3781 } else {
3782 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3783 return(NULL);
3784 }
3785
3786 /*
3787 * allocate a translation buffer.
3788 */
3789 buf_size = XML_PARSER_BUFFER_SIZE;
3790 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3791 if (buf == NULL) goto mem_error;
3792
3793 /*
3794 * OK loop until we reach one of the ending char or a size limit.
3795 */
3796 c = CUR_CHAR(l);
3797 while ((NXT(0) != limit) && /* checked */
3798 (IS_CHAR(c)) && (c != '<')) {
3799 if (c == 0) break;
3800 if (c == '&') {
3801 in_space = 0;
3802 if (NXT(1) == '#') {
3803 int val = xmlParseCharRef(ctxt);
3804
3805 if (val == '&') {
3806 if (ctxt->replaceEntities) {
3807 if (len > buf_size - 10) {
3808 growBuffer(buf, 10);
3809 }
3810 buf[len++] = '&';
3811 } else {
3812 /*
3813 * The reparsing will be done in xmlStringGetNodeList()
3814 * called by the attribute() function in SAX.c
3815 */
3816 if (len > buf_size - 10) {
3817 growBuffer(buf, 10);
3818 }
3819 buf[len++] = '&';
3820 buf[len++] = '#';
3821 buf[len++] = '3';
3822 buf[len++] = '8';
3823 buf[len++] = ';';
3824 }
3825 } else if (val != 0) {
3826 if (len > buf_size - 10) {
3827 growBuffer(buf, 10);
3828 }
3829 len += xmlCopyChar(0, &buf[len], val);
3830 }
3831 } else {
3832 ent = xmlParseEntityRef(ctxt);
3833 ctxt->nbentities++;
3834 if (ent != NULL)
3835 ctxt->nbentities += ent->owner;
3836 if ((ent != NULL) &&
3837 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3838 if (len > buf_size - 10) {
3839 growBuffer(buf, 10);
3840 }
3841 if ((ctxt->replaceEntities == 0) &&
3842 (ent->content[0] == '&')) {
3843 buf[len++] = '&';
3844 buf[len++] = '#';
3845 buf[len++] = '3';
3846 buf[len++] = '8';
3847 buf[len++] = ';';
3848 } else {
3849 buf[len++] = ent->content[0];
3850 }
3851 } else if ((ent != NULL) &&
3852 (ctxt->replaceEntities != 0)) {
3853 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3854 rep = xmlStringDecodeEntities(ctxt, ent->content,
3855 XML_SUBSTITUTE_REF,
3856 0, 0, 0);
3857 if (rep != NULL) {
3858 current = rep;
3859 while (*current != 0) { /* non input consuming */
3860 if ((*current == 0xD) || (*current == 0xA) ||
3861 (*current == 0x9)) {
3862 buf[len++] = 0x20;
3863 current++;
3864 } else
3865 buf[len++] = *current++;
3866 if (len > buf_size - 10) {
3867 growBuffer(buf, 10);
3868 }
3869 }
3870 xmlFree(rep);
3871 rep = NULL;
3872 }
3873 } else {
3874 if (len > buf_size - 10) {
3875 growBuffer(buf, 10);
3876 }
3877 if (ent->content != NULL)
3878 buf[len++] = ent->content[0];
3879 }
3880 } else if (ent != NULL) {
3881 int i = xmlStrlen(ent->name);
3882 const xmlChar *cur = ent->name;
3883
3884 /*
3885 * This may look absurd but is needed to detect
3886 * entities problems
3887 */
3888 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3889 (ent->content != NULL)) {
3890 rep = xmlStringDecodeEntities(ctxt, ent->content,
3891 XML_SUBSTITUTE_REF, 0, 0, 0);
3892 if (rep != NULL) {
3893 xmlFree(rep);
3894 rep = NULL;
3895 }
3896 }
3897
3898 /*
3899 * Just output the reference
3900 */
3901 buf[len++] = '&';
3902 while (len > buf_size - i - 10) {
3903 growBuffer(buf, i + 10);
3904 }
3905 for (;i > 0;i--)
3906 buf[len++] = *cur++;
3907 buf[len++] = ';';
3908 }
3909 }
3910 } else {
3911 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3912 if ((len != 0) || (!normalize)) {
3913 if ((!normalize) || (!in_space)) {
3914 COPY_BUF(l,buf,len,0x20);
3915 while (len > buf_size - 10) {
3916 growBuffer(buf, 10);
3917 }
3918 }
3919 in_space = 1;
3920 }
3921 } else {
3922 in_space = 0;
3923 COPY_BUF(l,buf,len,c);
3924 if (len > buf_size - 10) {
3925 growBuffer(buf, 10);
3926 }
3927 }
3928 NEXTL(l);
3929 }
3930 GROW;
3931 c = CUR_CHAR(l);
3932 }
3933 if ((in_space) && (normalize)) {
3934 while (buf[len - 1] == 0x20) len--;
3935 }
3936 buf[len] = 0;
3937 if (RAW == '<') {
3938 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3939 } else if (RAW != limit) {
3940 if ((c != 0) && (!IS_CHAR(c))) {
3941 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3942 "invalid character in attribute value\n");
3943 } else {
3944 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3945 "AttValue: ' expected\n");
3946 }
3947 } else
3948 NEXT;
3949 if (attlen != NULL) *attlen = len;
3950 return(buf);
3951
3952 mem_error:
3953 xmlErrMemory(ctxt, NULL);
3954 if (buf != NULL)
3955 xmlFree(buf);
3956 if (rep != NULL)
3957 xmlFree(rep);
3958 return(NULL);
3959 }
3960
3961 /**
3962 * xmlParseAttValue:
3963 * @ctxt: an XML parser context
3964 *
3965 * parse a value for an attribute
3966 * Note: the parser won't do substitution of entities here, this
3967 * will be handled later in xmlStringGetNodeList
3968 *
3969 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3970 * "'" ([^<&'] | Reference)* "'"
3971 *
3972 * 3.3.3 Attribute-Value Normalization:
3973 * Before the value of an attribute is passed to the application or
3974 * checked for validity, the XML processor must normalize it as follows:
3975 * - a character reference is processed by appending the referenced
3976 * character to the attribute value
3977 * - an entity reference is processed by recursively processing the
3978 * replacement text of the entity
3979 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3980 * appending #x20 to the normalized value, except that only a single
3981 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3982 * parsed entity or the literal entity value of an internal parsed entity
3983 * - other characters are processed by appending them to the normalized value
3984 * If the declared value is not CDATA, then the XML processor must further
3985 * process the normalized attribute value by discarding any leading and
3986 * trailing space (#x20) characters, and by replacing sequences of space
3987 * (#x20) characters by a single space (#x20) character.
3988 * All attributes for which no declaration has been read should be treated
3989 * by a non-validating parser as if declared CDATA.
3990 *
3991 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3992 */
3993
3994
3995 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)3996 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3997 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3998 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3999 }
4000
4001 /**
4002 * xmlParseSystemLiteral:
4003 * @ctxt: an XML parser context
4004 *
4005 * parse an XML Literal
4006 *
4007 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4008 *
4009 * Returns the SystemLiteral parsed or NULL
4010 */
4011
4012 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4013 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4014 xmlChar *buf = NULL;
4015 int len = 0;
4016 int size = XML_PARSER_BUFFER_SIZE;
4017 int cur, l;
4018 xmlChar stop;
4019 int state = ctxt->instate;
4020 int count = 0;
4021
4022 SHRINK;
4023 if (RAW == '"') {
4024 NEXT;
4025 stop = '"';
4026 } else if (RAW == '\'') {
4027 NEXT;
4028 stop = '\'';
4029 } else {
4030 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4031 return(NULL);
4032 }
4033
4034 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4035 if (buf == NULL) {
4036 xmlErrMemory(ctxt, NULL);
4037 return(NULL);
4038 }
4039 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4040 cur = CUR_CHAR(l);
4041 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4042 if (len + 5 >= size) {
4043 xmlChar *tmp;
4044
4045 size *= 2;
4046 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4047 if (tmp == NULL) {
4048 xmlFree(buf);
4049 xmlErrMemory(ctxt, NULL);
4050 ctxt->instate = (xmlParserInputState) state;
4051 return(NULL);
4052 }
4053 buf = tmp;
4054 }
4055 count++;
4056 if (count > 50) {
4057 GROW;
4058 count = 0;
4059 }
4060 COPY_BUF(l,buf,len,cur);
4061 NEXTL(l);
4062 cur = CUR_CHAR(l);
4063 if (cur == 0) {
4064 GROW;
4065 SHRINK;
4066 cur = CUR_CHAR(l);
4067 }
4068 }
4069 buf[len] = 0;
4070 ctxt->instate = (xmlParserInputState) state;
4071 if (!IS_CHAR(cur)) {
4072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4073 } else {
4074 NEXT;
4075 }
4076 return(buf);
4077 }
4078
4079 /**
4080 * xmlParsePubidLiteral:
4081 * @ctxt: an XML parser context
4082 *
4083 * parse an XML public literal
4084 *
4085 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4086 *
4087 * Returns the PubidLiteral parsed or NULL.
4088 */
4089
4090 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4091 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4092 xmlChar *buf = NULL;
4093 int len = 0;
4094 int size = XML_PARSER_BUFFER_SIZE;
4095 xmlChar cur;
4096 xmlChar stop;
4097 int count = 0;
4098 xmlParserInputState oldstate = ctxt->instate;
4099
4100 SHRINK;
4101 if (RAW == '"') {
4102 NEXT;
4103 stop = '"';
4104 } else if (RAW == '\'') {
4105 NEXT;
4106 stop = '\'';
4107 } else {
4108 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4109 return(NULL);
4110 }
4111 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4112 if (buf == NULL) {
4113 xmlErrMemory(ctxt, NULL);
4114 return(NULL);
4115 }
4116 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4117 cur = CUR;
4118 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4119 if (len + 1 >= size) {
4120 xmlChar *tmp;
4121
4122 size *= 2;
4123 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4124 if (tmp == NULL) {
4125 xmlErrMemory(ctxt, NULL);
4126 xmlFree(buf);
4127 return(NULL);
4128 }
4129 buf = tmp;
4130 }
4131 buf[len++] = cur;
4132 count++;
4133 if (count > 50) {
4134 GROW;
4135 count = 0;
4136 }
4137 NEXT;
4138 cur = CUR;
4139 if (cur == 0) {
4140 GROW;
4141 SHRINK;
4142 cur = CUR;
4143 }
4144 }
4145 buf[len] = 0;
4146 if (cur != stop) {
4147 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4148 } else {
4149 NEXT;
4150 }
4151 ctxt->instate = oldstate;
4152 return(buf);
4153 }
4154
4155 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4156
4157 /*
4158 * used for the test in the inner loop of the char data testing
4159 */
4160 static const unsigned char test_char_data[256] = {
4161 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4162 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4165 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4166 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4167 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4168 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4169 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4170 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4171 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4172 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4173 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4174 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4175 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4176 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4185 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4186 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4187 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4188 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4193 };
4194
4195 /**
4196 * xmlParseCharData:
4197 * @ctxt: an XML parser context
4198 * @cdata: int indicating whether we are within a CDATA section
4199 *
4200 * parse a CharData section.
4201 * if we are within a CDATA section ']]>' marks an end of section.
4202 *
4203 * The right angle bracket (>) may be represented using the string ">",
4204 * and must, for compatibility, be escaped using ">" or a character
4205 * reference when it appears in the string "]]>" in content, when that
4206 * string is not marking the end of a CDATA section.
4207 *
4208 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4209 */
4210
4211 void
xmlParseCharData(xmlParserCtxtPtr ctxt,int cdata)4212 xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4213 const xmlChar *in;
4214 int nbchar = 0;
4215 int line = ctxt->input->line;
4216 int col = ctxt->input->col;
4217 int ccol;
4218
4219 SHRINK;
4220 GROW;
4221 /*
4222 * Accelerated common case where input don't need to be
4223 * modified before passing it to the handler.
4224 */
4225 if (!cdata) {
4226 in = ctxt->input->cur;
4227 do {
4228 get_more_space:
4229 while (*in == 0x20) { in++; ctxt->input->col++; }
4230 if (*in == 0xA) {
4231 do {
4232 ctxt->input->line++; ctxt->input->col = 1;
4233 in++;
4234 } while (*in == 0xA);
4235 goto get_more_space;
4236 }
4237 if (*in == '<') {
4238 nbchar = in - ctxt->input->cur;
4239 if (nbchar > 0) {
4240 const xmlChar *tmp = ctxt->input->cur;
4241 ctxt->input->cur = in;
4242
4243 if ((ctxt->sax != NULL) &&
4244 (ctxt->sax->ignorableWhitespace !=
4245 ctxt->sax->characters)) {
4246 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4247 if (ctxt->sax->ignorableWhitespace != NULL)
4248 ctxt->sax->ignorableWhitespace(ctxt->userData,
4249 tmp, nbchar);
4250 } else {
4251 if (ctxt->sax->characters != NULL)
4252 ctxt->sax->characters(ctxt->userData,
4253 tmp, nbchar);
4254 if (*ctxt->space == -1)
4255 *ctxt->space = -2;
4256 }
4257 } else if ((ctxt->sax != NULL) &&
4258 (ctxt->sax->characters != NULL)) {
4259 ctxt->sax->characters(ctxt->userData,
4260 tmp, nbchar);
4261 }
4262 }
4263 return;
4264 }
4265
4266 get_more:
4267 ccol = ctxt->input->col;
4268 while (test_char_data[*in]) {
4269 in++;
4270 ccol++;
4271 }
4272 ctxt->input->col = ccol;
4273 if (*in == 0xA) {
4274 do {
4275 ctxt->input->line++; ctxt->input->col = 1;
4276 in++;
4277 } while (*in == 0xA);
4278 goto get_more;
4279 }
4280 if (*in == ']') {
4281 if ((in[1] == ']') && (in[2] == '>')) {
4282 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4283 ctxt->input->cur = in;
4284 return;
4285 }
4286 in++;
4287 ctxt->input->col++;
4288 goto get_more;
4289 }
4290 nbchar = in - ctxt->input->cur;
4291 if (nbchar > 0) {
4292 if ((ctxt->sax != NULL) &&
4293 (ctxt->sax->ignorableWhitespace !=
4294 ctxt->sax->characters) &&
4295 (IS_BLANK_CH(*ctxt->input->cur))) {
4296 const xmlChar *tmp = ctxt->input->cur;
4297 ctxt->input->cur = in;
4298
4299 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4300 if (ctxt->sax->ignorableWhitespace != NULL)
4301 ctxt->sax->ignorableWhitespace(ctxt->userData,
4302 tmp, nbchar);
4303 } else {
4304 if (ctxt->sax->characters != NULL)
4305 ctxt->sax->characters(ctxt->userData,
4306 tmp, nbchar);
4307 if (*ctxt->space == -1)
4308 *ctxt->space = -2;
4309 }
4310 line = ctxt->input->line;
4311 col = ctxt->input->col;
4312 } else if (ctxt->sax != NULL) {
4313 if (ctxt->sax->characters != NULL)
4314 ctxt->sax->characters(ctxt->userData,
4315 ctxt->input->cur, nbchar);
4316 line = ctxt->input->line;
4317 col = ctxt->input->col;
4318 }
4319 /* something really bad happened in the SAX callback */
4320 if (ctxt->instate != XML_PARSER_CONTENT)
4321 return;
4322 }
4323 ctxt->input->cur = in;
4324 if (*in == 0xD) {
4325 in++;
4326 if (*in == 0xA) {
4327 ctxt->input->cur = in;
4328 in++;
4329 ctxt->input->line++; ctxt->input->col = 1;
4330 continue; /* while */
4331 }
4332 in--;
4333 }
4334 if (*in == '<') {
4335 return;
4336 }
4337 if (*in == '&') {
4338 return;
4339 }
4340 SHRINK;
4341 GROW;
4342 in = ctxt->input->cur;
4343 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4344 nbchar = 0;
4345 }
4346 ctxt->input->line = line;
4347 ctxt->input->col = col;
4348 xmlParseCharDataComplex(ctxt, cdata);
4349 }
4350
4351 /**
4352 * xmlParseCharDataComplex:
4353 * @ctxt: an XML parser context
4354 * @cdata: int indicating whether we are within a CDATA section
4355 *
4356 * parse a CharData section.this is the fallback function
4357 * of xmlParseCharData() when the parsing requires handling
4358 * of non-ASCII characters.
4359 */
4360 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int cdata)4361 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4362 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4363 int nbchar = 0;
4364 int cur, l;
4365 int count = 0;
4366
4367 SHRINK;
4368 GROW;
4369 cur = CUR_CHAR(l);
4370 while ((cur != '<') && /* checked */
4371 (cur != '&') &&
4372 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4373 if ((cur == ']') && (NXT(1) == ']') &&
4374 (NXT(2) == '>')) {
4375 if (cdata) break;
4376 else {
4377 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4378 }
4379 }
4380 COPY_BUF(l,buf,nbchar,cur);
4381 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4382 buf[nbchar] = 0;
4383
4384 /*
4385 * OK the segment is to be consumed as chars.
4386 */
4387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4388 if (areBlanks(ctxt, buf, nbchar, 0)) {
4389 if (ctxt->sax->ignorableWhitespace != NULL)
4390 ctxt->sax->ignorableWhitespace(ctxt->userData,
4391 buf, nbchar);
4392 } else {
4393 if (ctxt->sax->characters != NULL)
4394 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4395 if ((ctxt->sax->characters !=
4396 ctxt->sax->ignorableWhitespace) &&
4397 (*ctxt->space == -1))
4398 *ctxt->space = -2;
4399 }
4400 }
4401 nbchar = 0;
4402 /* something really bad happened in the SAX callback */
4403 if (ctxt->instate != XML_PARSER_CONTENT)
4404 return;
4405 }
4406 count++;
4407 if (count > 50) {
4408 GROW;
4409 count = 0;
4410 }
4411 NEXTL(l);
4412 cur = CUR_CHAR(l);
4413 }
4414 if (nbchar != 0) {
4415 buf[nbchar] = 0;
4416 /*
4417 * OK the segment is to be consumed as chars.
4418 */
4419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4420 if (areBlanks(ctxt, buf, nbchar, 0)) {
4421 if (ctxt->sax->ignorableWhitespace != NULL)
4422 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4423 } else {
4424 if (ctxt->sax->characters != NULL)
4425 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4426 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4427 (*ctxt->space == -1))
4428 *ctxt->space = -2;
4429 }
4430 }
4431 }
4432 if ((cur != 0) && (!IS_CHAR(cur))) {
4433 /* Generate the error and skip the offending character */
4434 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4435 "PCDATA invalid Char value %d\n",
4436 cur);
4437 NEXTL(l);
4438 }
4439 }
4440
4441 /**
4442 * xmlParseExternalID:
4443 * @ctxt: an XML parser context
4444 * @publicID: a xmlChar** receiving PubidLiteral
4445 * @strict: indicate whether we should restrict parsing to only
4446 * production [75], see NOTE below
4447 *
4448 * Parse an External ID or a Public ID
4449 *
4450 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4451 * 'PUBLIC' S PubidLiteral S SystemLiteral
4452 *
4453 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4454 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4455 *
4456 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4457 *
4458 * Returns the function returns SystemLiteral and in the second
4459 * case publicID receives PubidLiteral, is strict is off
4460 * it is possible to return NULL and have publicID set.
4461 */
4462
4463 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)4464 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4465 xmlChar *URI = NULL;
4466
4467 SHRINK;
4468
4469 *publicID = NULL;
4470 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4471 SKIP(6);
4472 if (!IS_BLANK_CH(CUR)) {
4473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474 "Space required after 'SYSTEM'\n");
4475 }
4476 SKIP_BLANKS;
4477 URI = xmlParseSystemLiteral(ctxt);
4478 if (URI == NULL) {
4479 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4480 }
4481 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4482 SKIP(6);
4483 if (!IS_BLANK_CH(CUR)) {
4484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4485 "Space required after 'PUBLIC'\n");
4486 }
4487 SKIP_BLANKS;
4488 *publicID = xmlParsePubidLiteral(ctxt);
4489 if (*publicID == NULL) {
4490 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4491 }
4492 if (strict) {
4493 /*
4494 * We don't handle [83] so "S SystemLiteral" is required.
4495 */
4496 if (!IS_BLANK_CH(CUR)) {
4497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4498 "Space required after the Public Identifier\n");
4499 }
4500 } else {
4501 /*
4502 * We handle [83] so we return immediately, if
4503 * "S SystemLiteral" is not detected. From a purely parsing
4504 * point of view that's a nice mess.
4505 */
4506 const xmlChar *ptr;
4507 GROW;
4508
4509 ptr = CUR_PTR;
4510 if (!IS_BLANK_CH(*ptr)) return(NULL);
4511
4512 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4513 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4514 }
4515 SKIP_BLANKS;
4516 URI = xmlParseSystemLiteral(ctxt);
4517 if (URI == NULL) {
4518 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4519 }
4520 }
4521 return(URI);
4522 }
4523
4524 /**
4525 * xmlParseCommentComplex:
4526 * @ctxt: an XML parser context
4527 * @buf: the already parsed part of the buffer
4528 * @len: number of bytes filles in the buffer
4529 * @size: allocated size of the buffer
4530 *
4531 * Skip an XML (SGML) comment <!-- .... -->
4532 * The spec says that "For compatibility, the string "--" (double-hyphen)
4533 * must not occur within comments. "
4534 * This is the slow routine in case the accelerator for ascii didn't work
4535 *
4536 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4537 */
4538 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,int len,int size)4539 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
4540 int q, ql;
4541 int r, rl;
4542 int cur, l;
4543 int count = 0;
4544 int inputid;
4545
4546 inputid = ctxt->input->id;
4547
4548 if (buf == NULL) {
4549 len = 0;
4550 size = XML_PARSER_BUFFER_SIZE;
4551 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4552 if (buf == NULL) {
4553 xmlErrMemory(ctxt, NULL);
4554 return;
4555 }
4556 }
4557 GROW; /* Assure there's enough input data */
4558 q = CUR_CHAR(ql);
4559 if (q == 0)
4560 goto not_terminated;
4561 if (!IS_CHAR(q)) {
4562 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4563 "xmlParseComment: invalid xmlChar value %d\n",
4564 q);
4565 xmlFree (buf);
4566 return;
4567 }
4568 NEXTL(ql);
4569 r = CUR_CHAR(rl);
4570 if (r == 0)
4571 goto not_terminated;
4572 if (!IS_CHAR(r)) {
4573 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4574 "xmlParseComment: invalid xmlChar value %d\n",
4575 q);
4576 xmlFree (buf);
4577 return;
4578 }
4579 NEXTL(rl);
4580 cur = CUR_CHAR(l);
4581 if (cur == 0)
4582 goto not_terminated;
4583 while (IS_CHAR(cur) && /* checked */
4584 ((cur != '>') ||
4585 (r != '-') || (q != '-'))) {
4586 if ((r == '-') && (q == '-')) {
4587 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4588 }
4589 if (len + 5 >= size) {
4590 xmlChar *new_buf;
4591 size *= 2;
4592 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4593 if (new_buf == NULL) {
4594 xmlFree (buf);
4595 xmlErrMemory(ctxt, NULL);
4596 return;
4597 }
4598 buf = new_buf;
4599 }
4600 COPY_BUF(ql,buf,len,q);
4601 q = r;
4602 ql = rl;
4603 r = cur;
4604 rl = l;
4605
4606 count++;
4607 if (count > 50) {
4608 GROW;
4609 count = 0;
4610 }
4611 NEXTL(l);
4612 cur = CUR_CHAR(l);
4613 if (cur == 0) {
4614 SHRINK;
4615 GROW;
4616 cur = CUR_CHAR(l);
4617 }
4618 }
4619 buf[len] = 0;
4620 if (cur == 0) {
4621 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4622 "Comment not terminated \n<!--%.50s\n", buf);
4623 } else if (!IS_CHAR(cur)) {
4624 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4625 "xmlParseComment: invalid xmlChar value %d\n",
4626 cur);
4627 } else {
4628 if (inputid != ctxt->input->id) {
4629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4630 "Comment doesn't start and stop in the same entity\n");
4631 }
4632 NEXT;
4633 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4634 (!ctxt->disableSAX))
4635 ctxt->sax->comment(ctxt->userData, buf);
4636 }
4637 xmlFree(buf);
4638 return;
4639 not_terminated:
4640 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4641 "Comment not terminated\n", NULL);
4642 xmlFree(buf);
4643 return;
4644 }
4645
4646 /**
4647 * xmlParseComment:
4648 * @ctxt: an XML parser context
4649 *
4650 * Skip an XML (SGML) comment <!-- .... -->
4651 * The spec says that "For compatibility, the string "--" (double-hyphen)
4652 * must not occur within comments. "
4653 *
4654 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4655 */
4656 void
xmlParseComment(xmlParserCtxtPtr ctxt)4657 xmlParseComment(xmlParserCtxtPtr ctxt) {
4658 xmlChar *buf = NULL;
4659 int size = XML_PARSER_BUFFER_SIZE;
4660 int len = 0;
4661 xmlParserInputState state;
4662 const xmlChar *in;
4663 int nbchar = 0, ccol;
4664 int inputid;
4665
4666 /*
4667 * Check that there is a comment right here.
4668 */
4669 if ((RAW != '<') || (NXT(1) != '!') ||
4670 (NXT(2) != '-') || (NXT(3) != '-')) return;
4671 state = ctxt->instate;
4672 ctxt->instate = XML_PARSER_COMMENT;
4673 inputid = ctxt->input->id;
4674 SKIP(4);
4675 SHRINK;
4676 GROW;
4677
4678 /*
4679 * Accelerated common case where input don't need to be
4680 * modified before passing it to the handler.
4681 */
4682 in = ctxt->input->cur;
4683 do {
4684 if (*in == 0xA) {
4685 do {
4686 ctxt->input->line++; ctxt->input->col = 1;
4687 in++;
4688 } while (*in == 0xA);
4689 }
4690 get_more:
4691 ccol = ctxt->input->col;
4692 while (((*in > '-') && (*in <= 0x7F)) ||
4693 ((*in >= 0x20) && (*in < '-')) ||
4694 (*in == 0x09)) {
4695 in++;
4696 ccol++;
4697 }
4698 ctxt->input->col = ccol;
4699 if (*in == 0xA) {
4700 do {
4701 ctxt->input->line++; ctxt->input->col = 1;
4702 in++;
4703 } while (*in == 0xA);
4704 goto get_more;
4705 }
4706 nbchar = in - ctxt->input->cur;
4707 /*
4708 * save current set of data
4709 */
4710 if (nbchar > 0) {
4711 if ((ctxt->sax != NULL) &&
4712 (ctxt->sax->comment != NULL)) {
4713 if (buf == NULL) {
4714 if ((*in == '-') && (in[1] == '-'))
4715 size = nbchar + 1;
4716 else
4717 size = XML_PARSER_BUFFER_SIZE + nbchar;
4718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4719 if (buf == NULL) {
4720 xmlErrMemory(ctxt, NULL);
4721 ctxt->instate = state;
4722 return;
4723 }
4724 len = 0;
4725 } else if (len + nbchar + 1 >= size) {
4726 xmlChar *new_buf;
4727 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4728 new_buf = (xmlChar *) xmlRealloc(buf,
4729 size * sizeof(xmlChar));
4730 if (new_buf == NULL) {
4731 xmlFree (buf);
4732 xmlErrMemory(ctxt, NULL);
4733 ctxt->instate = state;
4734 return;
4735 }
4736 buf = new_buf;
4737 }
4738 memcpy(&buf[len], ctxt->input->cur, nbchar);
4739 len += nbchar;
4740 buf[len] = 0;
4741 }
4742 }
4743 ctxt->input->cur = in;
4744 if (*in == 0xA) {
4745 in++;
4746 ctxt->input->line++; ctxt->input->col = 1;
4747 }
4748 if (*in == 0xD) {
4749 in++;
4750 if (*in == 0xA) {
4751 ctxt->input->cur = in;
4752 in++;
4753 ctxt->input->line++; ctxt->input->col = 1;
4754 continue; /* while */
4755 }
4756 in--;
4757 }
4758 SHRINK;
4759 GROW;
4760 in = ctxt->input->cur;
4761 if (*in == '-') {
4762 if (in[1] == '-') {
4763 if (in[2] == '>') {
4764 if (ctxt->input->id != inputid) {
4765 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4766 "comment doesn't start and stop in the same entity\n");
4767 }
4768 SKIP(3);
4769 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4770 (!ctxt->disableSAX)) {
4771 if (buf != NULL)
4772 ctxt->sax->comment(ctxt->userData, buf);
4773 else
4774 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4775 }
4776 if (buf != NULL)
4777 xmlFree(buf);
4778 ctxt->instate = state;
4779 return;
4780 }
4781 if (buf != NULL)
4782 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4783 "Comment not terminated \n<!--%.50s\n",
4784 buf);
4785 else
4786 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4787 "Comment not terminated \n", NULL);
4788 in++;
4789 ctxt->input->col++;
4790 }
4791 in++;
4792 ctxt->input->col++;
4793 goto get_more;
4794 }
4795 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4796 xmlParseCommentComplex(ctxt, buf, len, size);
4797 ctxt->instate = state;
4798 return;
4799 }
4800
4801
4802 /**
4803 * xmlParsePITarget:
4804 * @ctxt: an XML parser context
4805 *
4806 * parse the name of a PI
4807 *
4808 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4809 *
4810 * Returns the PITarget name or NULL
4811 */
4812
4813 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)4814 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4815 const xmlChar *name;
4816
4817 name = xmlParseName(ctxt);
4818 if ((name != NULL) &&
4819 ((name[0] == 'x') || (name[0] == 'X')) &&
4820 ((name[1] == 'm') || (name[1] == 'M')) &&
4821 ((name[2] == 'l') || (name[2] == 'L'))) {
4822 int i;
4823 if ((name[0] == 'x') && (name[1] == 'm') &&
4824 (name[2] == 'l') && (name[3] == 0)) {
4825 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4826 "XML declaration allowed only at the start of the document\n");
4827 return(name);
4828 } else if (name[3] == 0) {
4829 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4830 return(name);
4831 }
4832 for (i = 0;;i++) {
4833 if (xmlW3CPIs[i] == NULL) break;
4834 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4835 return(name);
4836 }
4837 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4838 "xmlParsePITarget: invalid name prefix 'xml'\n",
4839 NULL, NULL);
4840 }
4841 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4842 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4843 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4844 }
4845 return(name);
4846 }
4847
4848 #ifdef LIBXML_CATALOG_ENABLED
4849 /**
4850 * xmlParseCatalogPI:
4851 * @ctxt: an XML parser context
4852 * @catalog: the PI value string
4853 *
4854 * parse an XML Catalog Processing Instruction.
4855 *
4856 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4857 *
4858 * Occurs only if allowed by the user and if happening in the Misc
4859 * part of the document before any doctype informations
4860 * This will add the given catalog to the parsing context in order
4861 * to be used if there is a resolution need further down in the document
4862 */
4863
4864 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)4865 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4866 xmlChar *URL = NULL;
4867 const xmlChar *tmp, *base;
4868 xmlChar marker;
4869
4870 tmp = catalog;
4871 while (IS_BLANK_CH(*tmp)) tmp++;
4872 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4873 goto error;
4874 tmp += 7;
4875 while (IS_BLANK_CH(*tmp)) tmp++;
4876 if (*tmp != '=') {
4877 return;
4878 }
4879 tmp++;
4880 while (IS_BLANK_CH(*tmp)) tmp++;
4881 marker = *tmp;
4882 if ((marker != '\'') && (marker != '"'))
4883 goto error;
4884 tmp++;
4885 base = tmp;
4886 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4887 if (*tmp == 0)
4888 goto error;
4889 URL = xmlStrndup(base, tmp - base);
4890 tmp++;
4891 while (IS_BLANK_CH(*tmp)) tmp++;
4892 if (*tmp != 0)
4893 goto error;
4894
4895 if (URL != NULL) {
4896 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4897 xmlFree(URL);
4898 }
4899 return;
4900
4901 error:
4902 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4903 "Catalog PI syntax error: %s\n",
4904 catalog, NULL);
4905 if (URL != NULL)
4906 xmlFree(URL);
4907 }
4908 #endif
4909
4910 /**
4911 * xmlParsePI:
4912 * @ctxt: an XML parser context
4913 *
4914 * parse an XML Processing Instruction.
4915 *
4916 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4917 *
4918 * The processing is transfered to SAX once parsed.
4919 */
4920
4921 void
xmlParsePI(xmlParserCtxtPtr ctxt)4922 xmlParsePI(xmlParserCtxtPtr ctxt) {
4923 xmlChar *buf = NULL;
4924 int len = 0;
4925 int size = XML_PARSER_BUFFER_SIZE;
4926 int cur, l;
4927 const xmlChar *target;
4928 xmlParserInputState state;
4929 int count = 0;
4930
4931 if ((RAW == '<') && (NXT(1) == '?')) {
4932 xmlParserInputPtr input = ctxt->input;
4933 state = ctxt->instate;
4934 ctxt->instate = XML_PARSER_PI;
4935 /*
4936 * this is a Processing Instruction.
4937 */
4938 SKIP(2);
4939 SHRINK;
4940
4941 /*
4942 * Parse the target name and check for special support like
4943 * namespace.
4944 */
4945 target = xmlParsePITarget(ctxt);
4946 if (target != NULL) {
4947 if ((RAW == '?') && (NXT(1) == '>')) {
4948 if (input != ctxt->input) {
4949 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4950 "PI declaration doesn't start and stop in the same entity\n");
4951 }
4952 SKIP(2);
4953
4954 /*
4955 * SAX: PI detected.
4956 */
4957 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4958 (ctxt->sax->processingInstruction != NULL))
4959 ctxt->sax->processingInstruction(ctxt->userData,
4960 target, NULL);
4961 if (ctxt->instate != XML_PARSER_EOF)
4962 ctxt->instate = state;
4963 return;
4964 }
4965 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4966 if (buf == NULL) {
4967 xmlErrMemory(ctxt, NULL);
4968 ctxt->instate = state;
4969 return;
4970 }
4971 cur = CUR;
4972 if (!IS_BLANK(cur)) {
4973 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4974 "ParsePI: PI %s space expected\n", target);
4975 }
4976 SKIP_BLANKS;
4977 cur = CUR_CHAR(l);
4978 while (IS_CHAR(cur) && /* checked */
4979 ((cur != '?') || (NXT(1) != '>'))) {
4980 if (len + 5 >= size) {
4981 xmlChar *tmp;
4982
4983 size *= 2;
4984 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4985 if (tmp == NULL) {
4986 xmlErrMemory(ctxt, NULL);
4987 xmlFree(buf);
4988 ctxt->instate = state;
4989 return;
4990 }
4991 buf = tmp;
4992 }
4993 count++;
4994 if (count > 50) {
4995 GROW;
4996 count = 0;
4997 }
4998 COPY_BUF(l,buf,len,cur);
4999 NEXTL(l);
5000 cur = CUR_CHAR(l);
5001 if (cur == 0) {
5002 SHRINK;
5003 GROW;
5004 cur = CUR_CHAR(l);
5005 }
5006 }
5007 buf[len] = 0;
5008 if (cur != '?') {
5009 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5010 "ParsePI: PI %s never end ...\n", target);
5011 } else {
5012 if (input != ctxt->input) {
5013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5014 "PI declaration doesn't start and stop in the same entity\n");
5015 }
5016 SKIP(2);
5017
5018 #ifdef LIBXML_CATALOG_ENABLED
5019 if (((state == XML_PARSER_MISC) ||
5020 (state == XML_PARSER_START)) &&
5021 (xmlStrEqual(target, XML_CATALOG_PI))) {
5022 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5023 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5024 (allow == XML_CATA_ALLOW_ALL))
5025 xmlParseCatalogPI(ctxt, buf);
5026 }
5027 #endif
5028
5029
5030 /*
5031 * SAX: PI detected.
5032 */
5033 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5034 (ctxt->sax->processingInstruction != NULL))
5035 ctxt->sax->processingInstruction(ctxt->userData,
5036 target, buf);
5037 }
5038 xmlFree(buf);
5039 } else {
5040 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5041 }
5042 if (ctxt->instate != XML_PARSER_EOF)
5043 ctxt->instate = state;
5044 }
5045 }
5046
5047 /**
5048 * xmlParseNotationDecl:
5049 * @ctxt: an XML parser context
5050 *
5051 * parse a notation declaration
5052 *
5053 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5054 *
5055 * Hence there is actually 3 choices:
5056 * 'PUBLIC' S PubidLiteral
5057 * 'PUBLIC' S PubidLiteral S SystemLiteral
5058 * and 'SYSTEM' S SystemLiteral
5059 *
5060 * See the NOTE on xmlParseExternalID().
5061 */
5062
5063 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5064 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5065 const xmlChar *name;
5066 xmlChar *Pubid;
5067 xmlChar *Systemid;
5068
5069 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5070 xmlParserInputPtr input = ctxt->input;
5071 SHRINK;
5072 SKIP(10);
5073 if (!IS_BLANK_CH(CUR)) {
5074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5075 "Space required after '<!NOTATION'\n");
5076 return;
5077 }
5078 SKIP_BLANKS;
5079
5080 name = xmlParseName(ctxt);
5081 if (name == NULL) {
5082 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5083 return;
5084 }
5085 if (!IS_BLANK_CH(CUR)) {
5086 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5087 "Space required after the NOTATION name'\n");
5088 return;
5089 }
5090 if (xmlStrchr(name, ':') != NULL) {
5091 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5092 "colon are forbidden from notation names '%s'\n",
5093 name, NULL, NULL);
5094 }
5095 SKIP_BLANKS;
5096
5097 /*
5098 * Parse the IDs.
5099 */
5100 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5101 SKIP_BLANKS;
5102
5103 if (RAW == '>') {
5104 if (input != ctxt->input) {
5105 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5106 "Notation declaration doesn't start and stop in the same entity\n");
5107 }
5108 NEXT;
5109 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5110 (ctxt->sax->notationDecl != NULL))
5111 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5112 } else {
5113 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5114 }
5115 if (Systemid != NULL) xmlFree(Systemid);
5116 if (Pubid != NULL) xmlFree(Pubid);
5117 }
5118 }
5119
5120 /**
5121 * xmlParseEntityDecl:
5122 * @ctxt: an XML parser context
5123 *
5124 * parse <!ENTITY declarations
5125 *
5126 * [70] EntityDecl ::= GEDecl | PEDecl
5127 *
5128 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5129 *
5130 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5131 *
5132 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5133 *
5134 * [74] PEDef ::= EntityValue | ExternalID
5135 *
5136 * [76] NDataDecl ::= S 'NDATA' S Name
5137 *
5138 * [ VC: Notation Declared ]
5139 * The Name must match the declared name of a notation.
5140 */
5141
5142 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5143 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5144 const xmlChar *name = NULL;
5145 xmlChar *value = NULL;
5146 xmlChar *URI = NULL, *literal = NULL;
5147 const xmlChar *ndata = NULL;
5148 int isParameter = 0;
5149 xmlChar *orig = NULL;
5150 int skipped;
5151
5152 /* GROW; done in the caller */
5153 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5154 xmlParserInputPtr input = ctxt->input;
5155 SHRINK;
5156 SKIP(8);
5157 skipped = SKIP_BLANKS;
5158 if (skipped == 0) {
5159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5160 "Space required after '<!ENTITY'\n");
5161 }
5162
5163 if (RAW == '%') {
5164 NEXT;
5165 skipped = SKIP_BLANKS;
5166 if (skipped == 0) {
5167 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5168 "Space required after '%'\n");
5169 }
5170 isParameter = 1;
5171 }
5172
5173 name = xmlParseName(ctxt);
5174 if (name == NULL) {
5175 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5176 "xmlParseEntityDecl: no name\n");
5177 return;
5178 }
5179 if (xmlStrchr(name, ':') != NULL) {
5180 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5181 "colon are forbidden from entities names '%s'\n",
5182 name, NULL, NULL);
5183 }
5184 skipped = SKIP_BLANKS;
5185 if (skipped == 0) {
5186 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5187 "Space required after the entity name\n");
5188 }
5189
5190 ctxt->instate = XML_PARSER_ENTITY_DECL;
5191 /*
5192 * handle the various case of definitions...
5193 */
5194 if (isParameter) {
5195 if ((RAW == '"') || (RAW == '\'')) {
5196 value = xmlParseEntityValue(ctxt, &orig);
5197 if (value) {
5198 if ((ctxt->sax != NULL) &&
5199 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5200 ctxt->sax->entityDecl(ctxt->userData, name,
5201 XML_INTERNAL_PARAMETER_ENTITY,
5202 NULL, NULL, value);
5203 }
5204 } else {
5205 URI = xmlParseExternalID(ctxt, &literal, 1);
5206 if ((URI == NULL) && (literal == NULL)) {
5207 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5208 }
5209 if (URI) {
5210 xmlURIPtr uri;
5211
5212 uri = xmlParseURI((const char *) URI);
5213 if (uri == NULL) {
5214 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5215 "Invalid URI: %s\n", URI);
5216 /*
5217 * This really ought to be a well formedness error
5218 * but the XML Core WG decided otherwise c.f. issue
5219 * E26 of the XML erratas.
5220 */
5221 } else {
5222 if (uri->fragment != NULL) {
5223 /*
5224 * Okay this is foolish to block those but not
5225 * invalid URIs.
5226 */
5227 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5228 } else {
5229 if ((ctxt->sax != NULL) &&
5230 (!ctxt->disableSAX) &&
5231 (ctxt->sax->entityDecl != NULL))
5232 ctxt->sax->entityDecl(ctxt->userData, name,
5233 XML_EXTERNAL_PARAMETER_ENTITY,
5234 literal, URI, NULL);
5235 }
5236 xmlFreeURI(uri);
5237 }
5238 }
5239 }
5240 } else {
5241 if ((RAW == '"') || (RAW == '\'')) {
5242 value = xmlParseEntityValue(ctxt, &orig);
5243 if ((ctxt->sax != NULL) &&
5244 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5245 ctxt->sax->entityDecl(ctxt->userData, name,
5246 XML_INTERNAL_GENERAL_ENTITY,
5247 NULL, NULL, value);
5248 /*
5249 * For expat compatibility in SAX mode.
5250 */
5251 if ((ctxt->myDoc == NULL) ||
5252 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5253 if (ctxt->myDoc == NULL) {
5254 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5255 if (ctxt->myDoc == NULL) {
5256 xmlErrMemory(ctxt, "New Doc failed");
5257 return;
5258 }
5259 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5260 }
5261 if (ctxt->myDoc->intSubset == NULL)
5262 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5263 BAD_CAST "fake", NULL, NULL);
5264
5265 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5266 NULL, NULL, value);
5267 }
5268 } else {
5269 URI = xmlParseExternalID(ctxt, &literal, 1);
5270 if ((URI == NULL) && (literal == NULL)) {
5271 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5272 }
5273 if (URI) {
5274 xmlURIPtr uri;
5275
5276 uri = xmlParseURI((const char *)URI);
5277 if (uri == NULL) {
5278 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5279 "Invalid URI: %s\n", URI);
5280 /*
5281 * This really ought to be a well formedness error
5282 * but the XML Core WG decided otherwise c.f. issue
5283 * E26 of the XML erratas.
5284 */
5285 } else {
5286 if (uri->fragment != NULL) {
5287 /*
5288 * Okay this is foolish to block those but not
5289 * invalid URIs.
5290 */
5291 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5292 }
5293 xmlFreeURI(uri);
5294 }
5295 }
5296 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5298 "Space required before 'NDATA'\n");
5299 }
5300 SKIP_BLANKS;
5301 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5302 SKIP(5);
5303 if (!IS_BLANK_CH(CUR)) {
5304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5305 "Space required after 'NDATA'\n");
5306 }
5307 SKIP_BLANKS;
5308 ndata = xmlParseName(ctxt);
5309 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5310 (ctxt->sax->unparsedEntityDecl != NULL))
5311 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5312 literal, URI, ndata);
5313 } else {
5314 if ((ctxt->sax != NULL) &&
5315 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5316 ctxt->sax->entityDecl(ctxt->userData, name,
5317 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5318 literal, URI, NULL);
5319 /*
5320 * For expat compatibility in SAX mode.
5321 * assuming the entity repalcement was asked for
5322 */
5323 if ((ctxt->replaceEntities != 0) &&
5324 ((ctxt->myDoc == NULL) ||
5325 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5326 if (ctxt->myDoc == NULL) {
5327 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5328 if (ctxt->myDoc == NULL) {
5329 xmlErrMemory(ctxt, "New Doc failed");
5330 return;
5331 }
5332 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5333 }
5334
5335 if (ctxt->myDoc->intSubset == NULL)
5336 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5337 BAD_CAST "fake", NULL, NULL);
5338 xmlSAX2EntityDecl(ctxt, name,
5339 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5340 literal, URI, NULL);
5341 }
5342 }
5343 }
5344 }
5345 SKIP_BLANKS;
5346 if (RAW != '>') {
5347 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5348 "xmlParseEntityDecl: entity %s not terminated\n", name);
5349 } else {
5350 if (input != ctxt->input) {
5351 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5352 "Entity declaration doesn't start and stop in the same entity\n");
5353 }
5354 NEXT;
5355 }
5356 if (orig != NULL) {
5357 /*
5358 * Ugly mechanism to save the raw entity value.
5359 */
5360 xmlEntityPtr cur = NULL;
5361
5362 if (isParameter) {
5363 if ((ctxt->sax != NULL) &&
5364 (ctxt->sax->getParameterEntity != NULL))
5365 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5366 } else {
5367 if ((ctxt->sax != NULL) &&
5368 (ctxt->sax->getEntity != NULL))
5369 cur = ctxt->sax->getEntity(ctxt->userData, name);
5370 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5371 cur = xmlSAX2GetEntity(ctxt, name);
5372 }
5373 }
5374 if (cur != NULL) {
5375 if (cur->orig != NULL)
5376 xmlFree(orig);
5377 else
5378 cur->orig = orig;
5379 } else
5380 xmlFree(orig);
5381 }
5382 if (value != NULL) xmlFree(value);
5383 if (URI != NULL) xmlFree(URI);
5384 if (literal != NULL) xmlFree(literal);
5385 }
5386 }
5387
5388 /**
5389 * xmlParseDefaultDecl:
5390 * @ctxt: an XML parser context
5391 * @value: Receive a possible fixed default value for the attribute
5392 *
5393 * Parse an attribute default declaration
5394 *
5395 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5396 *
5397 * [ VC: Required Attribute ]
5398 * if the default declaration is the keyword #REQUIRED, then the
5399 * attribute must be specified for all elements of the type in the
5400 * attribute-list declaration.
5401 *
5402 * [ VC: Attribute Default Legal ]
5403 * The declared default value must meet the lexical constraints of
5404 * the declared attribute type c.f. xmlValidateAttributeDecl()
5405 *
5406 * [ VC: Fixed Attribute Default ]
5407 * if an attribute has a default value declared with the #FIXED
5408 * keyword, instances of that attribute must match the default value.
5409 *
5410 * [ WFC: No < in Attribute Values ]
5411 * handled in xmlParseAttValue()
5412 *
5413 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5414 * or XML_ATTRIBUTE_FIXED.
5415 */
5416
5417 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5418 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5419 int val;
5420 xmlChar *ret;
5421
5422 *value = NULL;
5423 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5424 SKIP(9);
5425 return(XML_ATTRIBUTE_REQUIRED);
5426 }
5427 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5428 SKIP(8);
5429 return(XML_ATTRIBUTE_IMPLIED);
5430 }
5431 val = XML_ATTRIBUTE_NONE;
5432 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5433 SKIP(6);
5434 val = XML_ATTRIBUTE_FIXED;
5435 if (!IS_BLANK_CH(CUR)) {
5436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 "Space required after '#FIXED'\n");
5438 }
5439 SKIP_BLANKS;
5440 }
5441 ret = xmlParseAttValue(ctxt);
5442 ctxt->instate = XML_PARSER_DTD;
5443 if (ret == NULL) {
5444 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5445 "Attribute default value declaration error\n");
5446 } else
5447 *value = ret;
5448 return(val);
5449 }
5450
5451 /**
5452 * xmlParseNotationType:
5453 * @ctxt: an XML parser context
5454 *
5455 * parse an Notation attribute type.
5456 *
5457 * Note: the leading 'NOTATION' S part has already being parsed...
5458 *
5459 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5460 *
5461 * [ VC: Notation Attributes ]
5462 * Values of this type must match one of the notation names included
5463 * in the declaration; all notation names in the declaration must be declared.
5464 *
5465 * Returns: the notation attribute tree built while parsing
5466 */
5467
5468 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)5469 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5470 const xmlChar *name;
5471 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5472
5473 if (RAW != '(') {
5474 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5475 return(NULL);
5476 }
5477 SHRINK;
5478 do {
5479 NEXT;
5480 SKIP_BLANKS;
5481 name = xmlParseName(ctxt);
5482 if (name == NULL) {
5483 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5484 "Name expected in NOTATION declaration\n");
5485 xmlFreeEnumeration(ret);
5486 return(NULL);
5487 }
5488 tmp = ret;
5489 while (tmp != NULL) {
5490 if (xmlStrEqual(name, tmp->name)) {
5491 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5492 "standalone: attribute notation value token %s duplicated\n",
5493 name, NULL);
5494 if (!xmlDictOwns(ctxt->dict, name))
5495 xmlFree((xmlChar *) name);
5496 break;
5497 }
5498 tmp = tmp->next;
5499 }
5500 if (tmp == NULL) {
5501 cur = xmlCreateEnumeration(name);
5502 if (cur == NULL) {
5503 xmlFreeEnumeration(ret);
5504 return(NULL);
5505 }
5506 if (last == NULL) ret = last = cur;
5507 else {
5508 last->next = cur;
5509 last = cur;
5510 }
5511 }
5512 SKIP_BLANKS;
5513 } while (RAW == '|');
5514 if (RAW != ')') {
5515 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5516 xmlFreeEnumeration(ret);
5517 return(NULL);
5518 }
5519 NEXT;
5520 return(ret);
5521 }
5522
5523 /**
5524 * xmlParseEnumerationType:
5525 * @ctxt: an XML parser context
5526 *
5527 * parse an Enumeration attribute type.
5528 *
5529 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5530 *
5531 * [ VC: Enumeration ]
5532 * Values of this type must match one of the Nmtoken tokens in
5533 * the declaration
5534 *
5535 * Returns: the enumeration attribute tree built while parsing
5536 */
5537
5538 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)5539 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5540 xmlChar *name;
5541 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5542
5543 if (RAW != '(') {
5544 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5545 return(NULL);
5546 }
5547 SHRINK;
5548 do {
5549 NEXT;
5550 SKIP_BLANKS;
5551 name = xmlParseNmtoken(ctxt);
5552 if (name == NULL) {
5553 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5554 return(ret);
5555 }
5556 tmp = ret;
5557 while (tmp != NULL) {
5558 if (xmlStrEqual(name, tmp->name)) {
5559 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5560 "standalone: attribute enumeration value token %s duplicated\n",
5561 name, NULL);
5562 if (!xmlDictOwns(ctxt->dict, name))
5563 xmlFree(name);
5564 break;
5565 }
5566 tmp = tmp->next;
5567 }
5568 if (tmp == NULL) {
5569 cur = xmlCreateEnumeration(name);
5570 if (!xmlDictOwns(ctxt->dict, name))
5571 xmlFree(name);
5572 if (cur == NULL) {
5573 xmlFreeEnumeration(ret);
5574 return(NULL);
5575 }
5576 if (last == NULL) ret = last = cur;
5577 else {
5578 last->next = cur;
5579 last = cur;
5580 }
5581 }
5582 SKIP_BLANKS;
5583 } while (RAW == '|');
5584 if (RAW != ')') {
5585 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5586 return(ret);
5587 }
5588 NEXT;
5589 return(ret);
5590 }
5591
5592 /**
5593 * xmlParseEnumeratedType:
5594 * @ctxt: an XML parser context
5595 * @tree: the enumeration tree built while parsing
5596 *
5597 * parse an Enumerated attribute type.
5598 *
5599 * [57] EnumeratedType ::= NotationType | Enumeration
5600 *
5601 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5602 *
5603 *
5604 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5605 */
5606
5607 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5608 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5609 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5610 SKIP(8);
5611 if (!IS_BLANK_CH(CUR)) {
5612 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5613 "Space required after 'NOTATION'\n");
5614 return(0);
5615 }
5616 SKIP_BLANKS;
5617 *tree = xmlParseNotationType(ctxt);
5618 if (*tree == NULL) return(0);
5619 return(XML_ATTRIBUTE_NOTATION);
5620 }
5621 *tree = xmlParseEnumerationType(ctxt);
5622 if (*tree == NULL) return(0);
5623 return(XML_ATTRIBUTE_ENUMERATION);
5624 }
5625
5626 /**
5627 * xmlParseAttributeType:
5628 * @ctxt: an XML parser context
5629 * @tree: the enumeration tree built while parsing
5630 *
5631 * parse the Attribute list def for an element
5632 *
5633 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5634 *
5635 * [55] StringType ::= 'CDATA'
5636 *
5637 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5638 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5639 *
5640 * Validity constraints for attribute values syntax are checked in
5641 * xmlValidateAttributeValue()
5642 *
5643 * [ VC: ID ]
5644 * Values of type ID must match the Name production. A name must not
5645 * appear more than once in an XML document as a value of this type;
5646 * i.e., ID values must uniquely identify the elements which bear them.
5647 *
5648 * [ VC: One ID per Element Type ]
5649 * No element type may have more than one ID attribute specified.
5650 *
5651 * [ VC: ID Attribute Default ]
5652 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5653 *
5654 * [ VC: IDREF ]
5655 * Values of type IDREF must match the Name production, and values
5656 * of type IDREFS must match Names; each IDREF Name must match the value
5657 * of an ID attribute on some element in the XML document; i.e. IDREF
5658 * values must match the value of some ID attribute.
5659 *
5660 * [ VC: Entity Name ]
5661 * Values of type ENTITY must match the Name production, values
5662 * of type ENTITIES must match Names; each Entity Name must match the
5663 * name of an unparsed entity declared in the DTD.
5664 *
5665 * [ VC: Name Token ]
5666 * Values of type NMTOKEN must match the Nmtoken production; values
5667 * of type NMTOKENS must match Nmtokens.
5668 *
5669 * Returns the attribute type
5670 */
5671 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)5672 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5673 SHRINK;
5674 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5675 SKIP(5);
5676 return(XML_ATTRIBUTE_CDATA);
5677 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5678 SKIP(6);
5679 return(XML_ATTRIBUTE_IDREFS);
5680 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5681 SKIP(5);
5682 return(XML_ATTRIBUTE_IDREF);
5683 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5684 SKIP(2);
5685 return(XML_ATTRIBUTE_ID);
5686 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5687 SKIP(6);
5688 return(XML_ATTRIBUTE_ENTITY);
5689 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5690 SKIP(8);
5691 return(XML_ATTRIBUTE_ENTITIES);
5692 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5693 SKIP(8);
5694 return(XML_ATTRIBUTE_NMTOKENS);
5695 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5696 SKIP(7);
5697 return(XML_ATTRIBUTE_NMTOKEN);
5698 }
5699 return(xmlParseEnumeratedType(ctxt, tree));
5700 }
5701
5702 /**
5703 * xmlParseAttributeListDecl:
5704 * @ctxt: an XML parser context
5705 *
5706 * : parse the Attribute list def for an element
5707 *
5708 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5709 *
5710 * [53] AttDef ::= S Name S AttType S DefaultDecl
5711 *
5712 */
5713 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)5714 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5715 const xmlChar *elemName;
5716 const xmlChar *attrName;
5717 xmlEnumerationPtr tree;
5718
5719 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5720 xmlParserInputPtr input = ctxt->input;
5721
5722 SKIP(9);
5723 if (!IS_BLANK_CH(CUR)) {
5724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5725 "Space required after '<!ATTLIST'\n");
5726 }
5727 SKIP_BLANKS;
5728 elemName = xmlParseName(ctxt);
5729 if (elemName == NULL) {
5730 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5731 "ATTLIST: no name for Element\n");
5732 return;
5733 }
5734 SKIP_BLANKS;
5735 GROW;
5736 while (RAW != '>') {
5737 const xmlChar *check = CUR_PTR;
5738 int type;
5739 int def;
5740 xmlChar *defaultValue = NULL;
5741
5742 GROW;
5743 tree = NULL;
5744 attrName = xmlParseName(ctxt);
5745 if (attrName == NULL) {
5746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 "ATTLIST: no name for Attribute\n");
5748 break;
5749 }
5750 GROW;
5751 if (!IS_BLANK_CH(CUR)) {
5752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5753 "Space required after the attribute name\n");
5754 break;
5755 }
5756 SKIP_BLANKS;
5757
5758 type = xmlParseAttributeType(ctxt, &tree);
5759 if (type <= 0) {
5760 break;
5761 }
5762
5763 GROW;
5764 if (!IS_BLANK_CH(CUR)) {
5765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5766 "Space required after the attribute type\n");
5767 if (tree != NULL)
5768 xmlFreeEnumeration(tree);
5769 break;
5770 }
5771 SKIP_BLANKS;
5772
5773 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5774 if (def <= 0) {
5775 if (defaultValue != NULL)
5776 xmlFree(defaultValue);
5777 if (tree != NULL)
5778 xmlFreeEnumeration(tree);
5779 break;
5780 }
5781 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5782 xmlAttrNormalizeSpace(defaultValue, defaultValue);
5783
5784 GROW;
5785 if (RAW != '>') {
5786 if (!IS_BLANK_CH(CUR)) {
5787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5788 "Space required after the attribute default value\n");
5789 if (defaultValue != NULL)
5790 xmlFree(defaultValue);
5791 if (tree != NULL)
5792 xmlFreeEnumeration(tree);
5793 break;
5794 }
5795 SKIP_BLANKS;
5796 }
5797 if (check == CUR_PTR) {
5798 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5799 "in xmlParseAttributeListDecl\n");
5800 if (defaultValue != NULL)
5801 xmlFree(defaultValue);
5802 if (tree != NULL)
5803 xmlFreeEnumeration(tree);
5804 break;
5805 }
5806 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5807 (ctxt->sax->attributeDecl != NULL))
5808 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5809 type, def, defaultValue, tree);
5810 else if (tree != NULL)
5811 xmlFreeEnumeration(tree);
5812
5813 if ((ctxt->sax2) && (defaultValue != NULL) &&
5814 (def != XML_ATTRIBUTE_IMPLIED) &&
5815 (def != XML_ATTRIBUTE_REQUIRED)) {
5816 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5817 }
5818 if (ctxt->sax2) {
5819 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5820 }
5821 if (defaultValue != NULL)
5822 xmlFree(defaultValue);
5823 GROW;
5824 }
5825 if (RAW == '>') {
5826 if (input != ctxt->input) {
5827 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5828 "Attribute list declaration doesn't start and stop in the same entity\n",
5829 NULL, NULL);
5830 }
5831 NEXT;
5832 }
5833 }
5834 }
5835
5836 /**
5837 * xmlParseElementMixedContentDecl:
5838 * @ctxt: an XML parser context
5839 * @inputchk: the input used for the current entity, needed for boundary checks
5840 *
5841 * parse the declaration for a Mixed Element content
5842 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5843 *
5844 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5845 * '(' S? '#PCDATA' S? ')'
5846 *
5847 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5848 *
5849 * [ VC: No Duplicate Types ]
5850 * The same name must not appear more than once in a single
5851 * mixed-content declaration.
5852 *
5853 * returns: the list of the xmlElementContentPtr describing the element choices
5854 */
5855 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)5856 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5857 xmlElementContentPtr ret = NULL, cur = NULL, n;
5858 const xmlChar *elem = NULL;
5859
5860 GROW;
5861 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5862 SKIP(7);
5863 SKIP_BLANKS;
5864 SHRINK;
5865 if (RAW == ')') {
5866 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5867 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5868 "Element content declaration doesn't start and stop in the same entity\n",
5869 NULL, NULL);
5870 }
5871 NEXT;
5872 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5873 if (ret == NULL)
5874 return(NULL);
5875 if (RAW == '*') {
5876 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5877 NEXT;
5878 }
5879 return(ret);
5880 }
5881 if ((RAW == '(') || (RAW == '|')) {
5882 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5883 if (ret == NULL) return(NULL);
5884 }
5885 while (RAW == '|') {
5886 NEXT;
5887 if (elem == NULL) {
5888 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5889 if (ret == NULL) return(NULL);
5890 ret->c1 = cur;
5891 if (cur != NULL)
5892 cur->parent = ret;
5893 cur = ret;
5894 } else {
5895 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5896 if (n == NULL) return(NULL);
5897 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5898 if (n->c1 != NULL)
5899 n->c1->parent = n;
5900 cur->c2 = n;
5901 if (n != NULL)
5902 n->parent = cur;
5903 cur = n;
5904 }
5905 SKIP_BLANKS;
5906 elem = xmlParseName(ctxt);
5907 if (elem == NULL) {
5908 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5909 "xmlParseElementMixedContentDecl : Name expected\n");
5910 xmlFreeDocElementContent(ctxt->myDoc, cur);
5911 return(NULL);
5912 }
5913 SKIP_BLANKS;
5914 GROW;
5915 }
5916 if ((RAW == ')') && (NXT(1) == '*')) {
5917 if (elem != NULL) {
5918 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5919 XML_ELEMENT_CONTENT_ELEMENT);
5920 if (cur->c2 != NULL)
5921 cur->c2->parent = cur;
5922 }
5923 if (ret != NULL)
5924 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5925 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5926 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5927 "Element content declaration doesn't start and stop in the same entity\n",
5928 NULL, NULL);
5929 }
5930 SKIP(2);
5931 } else {
5932 xmlFreeDocElementContent(ctxt->myDoc, ret);
5933 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5934 return(NULL);
5935 }
5936
5937 } else {
5938 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5939 }
5940 return(ret);
5941 }
5942
5943 /**
5944 * xmlParseElementChildrenContentDeclPriv:
5945 * @ctxt: an XML parser context
5946 * @inputchk: the input used for the current entity, needed for boundary checks
5947 * @depth: the level of recursion
5948 *
5949 * parse the declaration for a Mixed Element content
5950 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5951 *
5952 *
5953 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5954 *
5955 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5956 *
5957 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5958 *
5959 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5960 *
5961 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5962 * TODO Parameter-entity replacement text must be properly nested
5963 * with parenthesized groups. That is to say, if either of the
5964 * opening or closing parentheses in a choice, seq, or Mixed
5965 * construct is contained in the replacement text for a parameter
5966 * entity, both must be contained in the same replacement text. For
5967 * interoperability, if a parameter-entity reference appears in a
5968 * choice, seq, or Mixed construct, its replacement text should not
5969 * be empty, and neither the first nor last non-blank character of
5970 * the replacement text should be a connector (| or ,).
5971 *
5972 * Returns the tree of xmlElementContentPtr describing the element
5973 * hierarchy.
5974 */
5975 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)5976 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5977 int depth) {
5978 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5979 const xmlChar *elem;
5980 xmlChar type = 0;
5981
5982 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5983 (depth > 2048)) {
5984 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5985 "xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5986 depth);
5987 return(NULL);
5988 }
5989 SKIP_BLANKS;
5990 GROW;
5991 if (RAW == '(') {
5992 int inputid = ctxt->input->id;
5993
5994 /* Recurse on first child */
5995 NEXT;
5996 SKIP_BLANKS;
5997 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5998 depth + 1);
5999 SKIP_BLANKS;
6000 GROW;
6001 } else {
6002 elem = xmlParseName(ctxt);
6003 if (elem == NULL) {
6004 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6005 return(NULL);
6006 }
6007 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6008 if (cur == NULL) {
6009 xmlErrMemory(ctxt, NULL);
6010 return(NULL);
6011 }
6012 GROW;
6013 if (RAW == '?') {
6014 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6015 NEXT;
6016 } else if (RAW == '*') {
6017 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6018 NEXT;
6019 } else if (RAW == '+') {
6020 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6021 NEXT;
6022 } else {
6023 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6024 }
6025 GROW;
6026 }
6027 SKIP_BLANKS;
6028 SHRINK;
6029 while (RAW != ')') {
6030 /*
6031 * Each loop we parse one separator and one element.
6032 */
6033 if (RAW == ',') {
6034 if (type == 0) type = CUR;
6035
6036 /*
6037 * Detect "Name | Name , Name" error
6038 */
6039 else if (type != CUR) {
6040 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6041 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6042 type);
6043 if ((last != NULL) && (last != ret))
6044 xmlFreeDocElementContent(ctxt->myDoc, last);
6045 if (ret != NULL)
6046 xmlFreeDocElementContent(ctxt->myDoc, ret);
6047 return(NULL);
6048 }
6049 NEXT;
6050
6051 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6052 if (op == NULL) {
6053 if ((last != NULL) && (last != ret))
6054 xmlFreeDocElementContent(ctxt->myDoc, last);
6055 xmlFreeDocElementContent(ctxt->myDoc, ret);
6056 return(NULL);
6057 }
6058 if (last == NULL) {
6059 op->c1 = ret;
6060 if (ret != NULL)
6061 ret->parent = op;
6062 ret = cur = op;
6063 } else {
6064 cur->c2 = op;
6065 if (op != NULL)
6066 op->parent = cur;
6067 op->c1 = last;
6068 if (last != NULL)
6069 last->parent = op;
6070 cur =op;
6071 last = NULL;
6072 }
6073 } else if (RAW == '|') {
6074 if (type == 0) type = CUR;
6075
6076 /*
6077 * Detect "Name , Name | Name" error
6078 */
6079 else if (type != CUR) {
6080 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6081 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6082 type);
6083 if ((last != NULL) && (last != ret))
6084 xmlFreeDocElementContent(ctxt->myDoc, last);
6085 if (ret != NULL)
6086 xmlFreeDocElementContent(ctxt->myDoc, ret);
6087 return(NULL);
6088 }
6089 NEXT;
6090
6091 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6092 if (op == NULL) {
6093 if ((last != NULL) && (last != ret))
6094 xmlFreeDocElementContent(ctxt->myDoc, last);
6095 if (ret != NULL)
6096 xmlFreeDocElementContent(ctxt->myDoc, ret);
6097 return(NULL);
6098 }
6099 if (last == NULL) {
6100 op->c1 = ret;
6101 if (ret != NULL)
6102 ret->parent = op;
6103 ret = cur = op;
6104 } else {
6105 cur->c2 = op;
6106 if (op != NULL)
6107 op->parent = cur;
6108 op->c1 = last;
6109 if (last != NULL)
6110 last->parent = op;
6111 cur =op;
6112 last = NULL;
6113 }
6114 } else {
6115 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6116 if ((last != NULL) && (last != ret))
6117 xmlFreeDocElementContent(ctxt->myDoc, last);
6118 if (ret != NULL)
6119 xmlFreeDocElementContent(ctxt->myDoc, ret);
6120 return(NULL);
6121 }
6122 GROW;
6123 SKIP_BLANKS;
6124 GROW;
6125 if (RAW == '(') {
6126 int inputid = ctxt->input->id;
6127 /* Recurse on second child */
6128 NEXT;
6129 SKIP_BLANKS;
6130 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6131 depth + 1);
6132 SKIP_BLANKS;
6133 } else {
6134 elem = xmlParseName(ctxt);
6135 if (elem == NULL) {
6136 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6137 if (ret != NULL)
6138 xmlFreeDocElementContent(ctxt->myDoc, ret);
6139 return(NULL);
6140 }
6141 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6142 if (last == NULL) {
6143 if (ret != NULL)
6144 xmlFreeDocElementContent(ctxt->myDoc, ret);
6145 return(NULL);
6146 }
6147 if (RAW == '?') {
6148 last->ocur = XML_ELEMENT_CONTENT_OPT;
6149 NEXT;
6150 } else if (RAW == '*') {
6151 last->ocur = XML_ELEMENT_CONTENT_MULT;
6152 NEXT;
6153 } else if (RAW == '+') {
6154 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6155 NEXT;
6156 } else {
6157 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6158 }
6159 }
6160 SKIP_BLANKS;
6161 GROW;
6162 }
6163 if ((cur != NULL) && (last != NULL)) {
6164 cur->c2 = last;
6165 if (last != NULL)
6166 last->parent = cur;
6167 }
6168 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6169 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6170 "Element content declaration doesn't start and stop in the same entity\n",
6171 NULL, NULL);
6172 }
6173 NEXT;
6174 if (RAW == '?') {
6175 if (ret != NULL) {
6176 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6177 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6178 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6179 else
6180 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6181 }
6182 NEXT;
6183 } else if (RAW == '*') {
6184 if (ret != NULL) {
6185 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6186 cur = ret;
6187 /*
6188 * Some normalization:
6189 * (a | b* | c?)* == (a | b | c)*
6190 */
6191 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6192 if ((cur->c1 != NULL) &&
6193 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6194 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6195 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6196 if ((cur->c2 != NULL) &&
6197 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6199 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6200 cur = cur->c2;
6201 }
6202 }
6203 NEXT;
6204 } else if (RAW == '+') {
6205 if (ret != NULL) {
6206 int found = 0;
6207
6208 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6209 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6210 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6211 else
6212 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6213 /*
6214 * Some normalization:
6215 * (a | b*)+ == (a | b)*
6216 * (a | b?)+ == (a | b)*
6217 */
6218 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6219 if ((cur->c1 != NULL) &&
6220 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6221 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6222 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6223 found = 1;
6224 }
6225 if ((cur->c2 != NULL) &&
6226 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6227 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6228 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6229 found = 1;
6230 }
6231 cur = cur->c2;
6232 }
6233 if (found)
6234 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6235 }
6236 NEXT;
6237 }
6238 return(ret);
6239 }
6240
6241 /**
6242 * xmlParseElementChildrenContentDecl:
6243 * @ctxt: an XML parser context
6244 * @inputchk: the input used for the current entity, needed for boundary checks
6245 *
6246 * parse the declaration for a Mixed Element content
6247 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248 *
6249 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6250 *
6251 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6252 *
6253 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6254 *
6255 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6256 *
6257 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6258 * TODO Parameter-entity replacement text must be properly nested
6259 * with parenthesized groups. That is to say, if either of the
6260 * opening or closing parentheses in a choice, seq, or Mixed
6261 * construct is contained in the replacement text for a parameter
6262 * entity, both must be contained in the same replacement text. For
6263 * interoperability, if a parameter-entity reference appears in a
6264 * choice, seq, or Mixed construct, its replacement text should not
6265 * be empty, and neither the first nor last non-blank character of
6266 * the replacement text should be a connector (| or ,).
6267 *
6268 * Returns the tree of xmlElementContentPtr describing the element
6269 * hierarchy.
6270 */
6271 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6272 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6273 /* stub left for API/ABI compat */
6274 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6275 }
6276
6277 /**
6278 * xmlParseElementContentDecl:
6279 * @ctxt: an XML parser context
6280 * @name: the name of the element being defined.
6281 * @result: the Element Content pointer will be stored here if any
6282 *
6283 * parse the declaration for an Element content either Mixed or Children,
6284 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6285 *
6286 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6287 *
6288 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6289 */
6290
6291 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6292 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6293 xmlElementContentPtr *result) {
6294
6295 xmlElementContentPtr tree = NULL;
6296 int inputid = ctxt->input->id;
6297 int res;
6298
6299 *result = NULL;
6300
6301 if (RAW != '(') {
6302 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6303 "xmlParseElementContentDecl : %s '(' expected\n", name);
6304 return(-1);
6305 }
6306 NEXT;
6307 GROW;
6308 SKIP_BLANKS;
6309 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6310 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6311 res = XML_ELEMENT_TYPE_MIXED;
6312 } else {
6313 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6314 res = XML_ELEMENT_TYPE_ELEMENT;
6315 }
6316 SKIP_BLANKS;
6317 *result = tree;
6318 return(res);
6319 }
6320
6321 /**
6322 * xmlParseElementDecl:
6323 * @ctxt: an XML parser context
6324 *
6325 * parse an Element declaration.
6326 *
6327 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6328 *
6329 * [ VC: Unique Element Type Declaration ]
6330 * No element type may be declared more than once
6331 *
6332 * Returns the type of the element, or -1 in case of error
6333 */
6334 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6335 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6336 const xmlChar *name;
6337 int ret = -1;
6338 xmlElementContentPtr content = NULL;
6339
6340 /* GROW; done in the caller */
6341 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6342 xmlParserInputPtr input = ctxt->input;
6343
6344 SKIP(9);
6345 if (!IS_BLANK_CH(CUR)) {
6346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6347 "Space required after 'ELEMENT'\n");
6348 }
6349 SKIP_BLANKS;
6350 name = xmlParseName(ctxt);
6351 if (name == NULL) {
6352 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6353 "xmlParseElementDecl: no name for Element\n");
6354 return(-1);
6355 }
6356 while ((RAW == 0) && (ctxt->inputNr > 1))
6357 xmlPopInput(ctxt);
6358 if (!IS_BLANK_CH(CUR)) {
6359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6360 "Space required after the element name\n");
6361 }
6362 SKIP_BLANKS;
6363 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6364 SKIP(5);
6365 /*
6366 * Element must always be empty.
6367 */
6368 ret = XML_ELEMENT_TYPE_EMPTY;
6369 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6370 (NXT(2) == 'Y')) {
6371 SKIP(3);
6372 /*
6373 * Element is a generic container.
6374 */
6375 ret = XML_ELEMENT_TYPE_ANY;
6376 } else if (RAW == '(') {
6377 ret = xmlParseElementContentDecl(ctxt, name, &content);
6378 } else {
6379 /*
6380 * [ WFC: PEs in Internal Subset ] error handling.
6381 */
6382 if ((RAW == '%') && (ctxt->external == 0) &&
6383 (ctxt->inputNr == 1)) {
6384 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6385 "PEReference: forbidden within markup decl in internal subset\n");
6386 } else {
6387 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6388 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6389 }
6390 return(-1);
6391 }
6392
6393 SKIP_BLANKS;
6394 /*
6395 * Pop-up of finished entities.
6396 */
6397 while ((RAW == 0) && (ctxt->inputNr > 1))
6398 xmlPopInput(ctxt);
6399 SKIP_BLANKS;
6400
6401 if (RAW != '>') {
6402 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6403 if (content != NULL) {
6404 xmlFreeDocElementContent(ctxt->myDoc, content);
6405 }
6406 } else {
6407 if (input != ctxt->input) {
6408 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6409 "Element declaration doesn't start and stop in the same entity\n");
6410 }
6411
6412 NEXT;
6413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6414 (ctxt->sax->elementDecl != NULL)) {
6415 if (content != NULL)
6416 content->parent = NULL;
6417 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6418 content);
6419 if ((content != NULL) && (content->parent == NULL)) {
6420 /*
6421 * this is a trick: if xmlAddElementDecl is called,
6422 * instead of copying the full tree it is plugged directly
6423 * if called from the parser. Avoid duplicating the
6424 * interfaces or change the API/ABI
6425 */
6426 xmlFreeDocElementContent(ctxt->myDoc, content);
6427 }
6428 } else if (content != NULL) {
6429 xmlFreeDocElementContent(ctxt->myDoc, content);
6430 }
6431 }
6432 }
6433 return(ret);
6434 }
6435
6436 /**
6437 * xmlParseConditionalSections
6438 * @ctxt: an XML parser context
6439 *
6440 * [61] conditionalSect ::= includeSect | ignoreSect
6441 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6442 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6443 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6444 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6445 */
6446
6447 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6448 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6449 int id = ctxt->input->id;
6450
6451 SKIP(3);
6452 SKIP_BLANKS;
6453 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6454 SKIP(7);
6455 SKIP_BLANKS;
6456 if (RAW != '[') {
6457 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6458 } else {
6459 if (ctxt->input->id != id) {
6460 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6461 "All markup of the conditional section is not in the same entity\n",
6462 NULL, NULL);
6463 }
6464 NEXT;
6465 }
6466 if (xmlParserDebugEntities) {
6467 if ((ctxt->input != NULL) && (ctxt->input->filename))
6468 xmlGenericError(xmlGenericErrorContext,
6469 "%s(%d): ", ctxt->input->filename,
6470 ctxt->input->line);
6471 xmlGenericError(xmlGenericErrorContext,
6472 "Entering INCLUDE Conditional Section\n");
6473 }
6474
6475 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6476 (NXT(2) != '>'))) {
6477 const xmlChar *check = CUR_PTR;
6478 unsigned int cons = ctxt->input->consumed;
6479
6480 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6481 xmlParseConditionalSections(ctxt);
6482 } else if (IS_BLANK_CH(CUR)) {
6483 NEXT;
6484 } else if (RAW == '%') {
6485 xmlParsePEReference(ctxt);
6486 } else
6487 xmlParseMarkupDecl(ctxt);
6488
6489 /*
6490 * Pop-up of finished entities.
6491 */
6492 while ((RAW == 0) && (ctxt->inputNr > 1))
6493 xmlPopInput(ctxt);
6494
6495 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6496 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6497 break;
6498 }
6499 }
6500 if (xmlParserDebugEntities) {
6501 if ((ctxt->input != NULL) && (ctxt->input->filename))
6502 xmlGenericError(xmlGenericErrorContext,
6503 "%s(%d): ", ctxt->input->filename,
6504 ctxt->input->line);
6505 xmlGenericError(xmlGenericErrorContext,
6506 "Leaving INCLUDE Conditional Section\n");
6507 }
6508
6509 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6510 int state;
6511 xmlParserInputState instate;
6512 int depth = 0;
6513
6514 SKIP(6);
6515 SKIP_BLANKS;
6516 if (RAW != '[') {
6517 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6518 } else {
6519 if (ctxt->input->id != id) {
6520 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6521 "All markup of the conditional section is not in the same entity\n",
6522 NULL, NULL);
6523 }
6524 NEXT;
6525 }
6526 if (xmlParserDebugEntities) {
6527 if ((ctxt->input != NULL) && (ctxt->input->filename))
6528 xmlGenericError(xmlGenericErrorContext,
6529 "%s(%d): ", ctxt->input->filename,
6530 ctxt->input->line);
6531 xmlGenericError(xmlGenericErrorContext,
6532 "Entering IGNORE Conditional Section\n");
6533 }
6534
6535 /*
6536 * Parse up to the end of the conditional section
6537 * But disable SAX event generating DTD building in the meantime
6538 */
6539 state = ctxt->disableSAX;
6540 instate = ctxt->instate;
6541 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6542 ctxt->instate = XML_PARSER_IGNORE;
6543
6544 while ((depth >= 0) && (RAW != 0)) {
6545 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6546 depth++;
6547 SKIP(3);
6548 continue;
6549 }
6550 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6551 if (--depth >= 0) SKIP(3);
6552 continue;
6553 }
6554 NEXT;
6555 continue;
6556 }
6557
6558 ctxt->disableSAX = state;
6559 ctxt->instate = instate;
6560
6561 if (xmlParserDebugEntities) {
6562 if ((ctxt->input != NULL) && (ctxt->input->filename))
6563 xmlGenericError(xmlGenericErrorContext,
6564 "%s(%d): ", ctxt->input->filename,
6565 ctxt->input->line);
6566 xmlGenericError(xmlGenericErrorContext,
6567 "Leaving IGNORE Conditional Section\n");
6568 }
6569
6570 } else {
6571 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6572 }
6573
6574 if (RAW == 0)
6575 SHRINK;
6576
6577 if (RAW == 0) {
6578 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6579 } else {
6580 if (ctxt->input->id != id) {
6581 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6582 "All markup of the conditional section is not in the same entity\n",
6583 NULL, NULL);
6584 }
6585 SKIP(3);
6586 }
6587 }
6588
6589 /**
6590 * xmlParseMarkupDecl:
6591 * @ctxt: an XML parser context
6592 *
6593 * parse Markup declarations
6594 *
6595 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6596 * NotationDecl | PI | Comment
6597 *
6598 * [ VC: Proper Declaration/PE Nesting ]
6599 * Parameter-entity replacement text must be properly nested with
6600 * markup declarations. That is to say, if either the first character
6601 * or the last character of a markup declaration (markupdecl above) is
6602 * contained in the replacement text for a parameter-entity reference,
6603 * both must be contained in the same replacement text.
6604 *
6605 * [ WFC: PEs in Internal Subset ]
6606 * In the internal DTD subset, parameter-entity references can occur
6607 * only where markup declarations can occur, not within markup declarations.
6608 * (This does not apply to references that occur in external parameter
6609 * entities or to the external subset.)
6610 */
6611 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)6612 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6613 GROW;
6614 if (CUR == '<') {
6615 if (NXT(1) == '!') {
6616 switch (NXT(2)) {
6617 case 'E':
6618 if (NXT(3) == 'L')
6619 xmlParseElementDecl(ctxt);
6620 else if (NXT(3) == 'N')
6621 xmlParseEntityDecl(ctxt);
6622 break;
6623 case 'A':
6624 xmlParseAttributeListDecl(ctxt);
6625 break;
6626 case 'N':
6627 xmlParseNotationDecl(ctxt);
6628 break;
6629 case '-':
6630 xmlParseComment(ctxt);
6631 break;
6632 default:
6633 /* there is an error but it will be detected later */
6634 break;
6635 }
6636 } else if (NXT(1) == '?') {
6637 xmlParsePI(ctxt);
6638 }
6639 }
6640 /*
6641 * This is only for internal subset. On external entities,
6642 * the replacement is done before parsing stage
6643 */
6644 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6645 xmlParsePEReference(ctxt);
6646
6647 /*
6648 * Conditional sections are allowed from entities included
6649 * by PE References in the internal subset.
6650 */
6651 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6652 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6653 xmlParseConditionalSections(ctxt);
6654 }
6655 }
6656
6657 ctxt->instate = XML_PARSER_DTD;
6658 }
6659
6660 /**
6661 * xmlParseTextDecl:
6662 * @ctxt: an XML parser context
6663 *
6664 * parse an XML declaration header for external entities
6665 *
6666 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6667 */
6668
6669 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)6670 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6671 xmlChar *version;
6672 const xmlChar *encoding;
6673
6674 /*
6675 * We know that '<?xml' is here.
6676 */
6677 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6678 SKIP(5);
6679 } else {
6680 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6681 return;
6682 }
6683
6684 if (!IS_BLANK_CH(CUR)) {
6685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6686 "Space needed after '<?xml'\n");
6687 }
6688 SKIP_BLANKS;
6689
6690 /*
6691 * We may have the VersionInfo here.
6692 */
6693 version = xmlParseVersionInfo(ctxt);
6694 if (version == NULL)
6695 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6696 else {
6697 if (!IS_BLANK_CH(CUR)) {
6698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6699 "Space needed here\n");
6700 }
6701 }
6702 ctxt->input->version = version;
6703
6704 /*
6705 * We must have the encoding declaration
6706 */
6707 encoding = xmlParseEncodingDecl(ctxt);
6708 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6709 /*
6710 * The XML REC instructs us to stop parsing right here
6711 */
6712 return;
6713 }
6714 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6715 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6716 "Missing encoding in text declaration\n");
6717 }
6718
6719 SKIP_BLANKS;
6720 if ((RAW == '?') && (NXT(1) == '>')) {
6721 SKIP(2);
6722 } else if (RAW == '>') {
6723 /* Deprecated old WD ... */
6724 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6725 NEXT;
6726 } else {
6727 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6728 MOVETO_ENDTAG(CUR_PTR);
6729 NEXT;
6730 }
6731 }
6732
6733 /**
6734 * xmlParseExternalSubset:
6735 * @ctxt: an XML parser context
6736 * @ExternalID: the external identifier
6737 * @SystemID: the system identifier (or URL)
6738 *
6739 * parse Markup declarations from an external subset
6740 *
6741 * [30] extSubset ::= textDecl? extSubsetDecl
6742 *
6743 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6744 */
6745 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)6746 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6747 const xmlChar *SystemID) {
6748 xmlDetectSAX2(ctxt);
6749 GROW;
6750
6751 if ((ctxt->encoding == NULL) &&
6752 (ctxt->input->end - ctxt->input->cur >= 4)) {
6753 xmlChar start[4];
6754 xmlCharEncoding enc;
6755
6756 start[0] = RAW;
6757 start[1] = NXT(1);
6758 start[2] = NXT(2);
6759 start[3] = NXT(3);
6760 enc = xmlDetectCharEncoding(start, 4);
6761 if (enc != XML_CHAR_ENCODING_NONE)
6762 xmlSwitchEncoding(ctxt, enc);
6763 }
6764
6765 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6766 xmlParseTextDecl(ctxt);
6767 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6768 /*
6769 * The XML REC instructs us to stop parsing right here
6770 */
6771 ctxt->instate = XML_PARSER_EOF;
6772 return;
6773 }
6774 }
6775 if (ctxt->myDoc == NULL) {
6776 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6777 if (ctxt->myDoc == NULL) {
6778 xmlErrMemory(ctxt, "New Doc failed");
6779 return;
6780 }
6781 ctxt->myDoc->properties = XML_DOC_INTERNAL;
6782 }
6783 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6784 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6785
6786 ctxt->instate = XML_PARSER_DTD;
6787 ctxt->external = 1;
6788 while (((RAW == '<') && (NXT(1) == '?')) ||
6789 ((RAW == '<') && (NXT(1) == '!')) ||
6790 (RAW == '%') || IS_BLANK_CH(CUR)) {
6791 const xmlChar *check = CUR_PTR;
6792 unsigned int cons = ctxt->input->consumed;
6793
6794 GROW;
6795 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6796 xmlParseConditionalSections(ctxt);
6797 } else if (IS_BLANK_CH(CUR)) {
6798 NEXT;
6799 } else if (RAW == '%') {
6800 xmlParsePEReference(ctxt);
6801 } else
6802 xmlParseMarkupDecl(ctxt);
6803
6804 /*
6805 * Pop-up of finished entities.
6806 */
6807 while ((RAW == 0) && (ctxt->inputNr > 1))
6808 xmlPopInput(ctxt);
6809
6810 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6811 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6812 break;
6813 }
6814 }
6815
6816 if (RAW != 0) {
6817 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6818 }
6819
6820 }
6821
6822 /**
6823 * xmlParseReference:
6824 * @ctxt: an XML parser context
6825 *
6826 * parse and handle entity references in content, depending on the SAX
6827 * interface, this may end-up in a call to character() if this is a
6828 * CharRef, a predefined entity, if there is no reference() callback.
6829 * or if the parser was asked to switch to that mode.
6830 *
6831 * [67] Reference ::= EntityRef | CharRef
6832 */
6833 void
xmlParseReference(xmlParserCtxtPtr ctxt)6834 xmlParseReference(xmlParserCtxtPtr ctxt) {
6835 xmlEntityPtr ent;
6836 xmlChar *val;
6837 int was_checked;
6838 xmlNodePtr list = NULL;
6839 xmlParserErrors ret = XML_ERR_OK;
6840
6841
6842 if (RAW != '&')
6843 return;
6844
6845 /*
6846 * Simple case of a CharRef
6847 */
6848 if (NXT(1) == '#') {
6849 int i = 0;
6850 xmlChar out[10];
6851 int hex = NXT(2);
6852 int value = xmlParseCharRef(ctxt);
6853
6854 if (value == 0)
6855 return;
6856 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6857 /*
6858 * So we are using non-UTF-8 buffers
6859 * Check that the char fit on 8bits, if not
6860 * generate a CharRef.
6861 */
6862 if (value <= 0xFF) {
6863 out[0] = value;
6864 out[1] = 0;
6865 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6866 (!ctxt->disableSAX))
6867 ctxt->sax->characters(ctxt->userData, out, 1);
6868 } else {
6869 if ((hex == 'x') || (hex == 'X'))
6870 snprintf((char *)out, sizeof(out), "#x%X", value);
6871 else
6872 snprintf((char *)out, sizeof(out), "#%d", value);
6873 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6874 (!ctxt->disableSAX))
6875 ctxt->sax->reference(ctxt->userData, out);
6876 }
6877 } else {
6878 /*
6879 * Just encode the value in UTF-8
6880 */
6881 COPY_BUF(0 ,out, i, value);
6882 out[i] = 0;
6883 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6884 (!ctxt->disableSAX))
6885 ctxt->sax->characters(ctxt->userData, out, i);
6886 }
6887 return;
6888 }
6889
6890 /*
6891 * We are seeing an entity reference
6892 */
6893 ent = xmlParseEntityRef(ctxt);
6894 if (ent == NULL) return;
6895 if (!ctxt->wellFormed)
6896 return;
6897 was_checked = ent->checked;
6898
6899 /* special case of predefined entities */
6900 if ((ent->name == NULL) ||
6901 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6902 val = ent->content;
6903 if (val == NULL) return;
6904 /*
6905 * inline the entity.
6906 */
6907 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6908 (!ctxt->disableSAX))
6909 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6910 return;
6911 }
6912
6913 /*
6914 * The first reference to the entity trigger a parsing phase
6915 * where the ent->children is filled with the result from
6916 * the parsing.
6917 */
6918 if (ent->checked == 0) {
6919 unsigned long oldnbent = ctxt->nbentities;
6920
6921 /*
6922 * This is a bit hackish but this seems the best
6923 * way to make sure both SAX and DOM entity support
6924 * behaves okay.
6925 */
6926 void *user_data;
6927 if (ctxt->userData == ctxt)
6928 user_data = NULL;
6929 else
6930 user_data = ctxt->userData;
6931
6932 /*
6933 * Check that this entity is well formed
6934 * 4.3.2: An internal general parsed entity is well-formed
6935 * if its replacement text matches the production labeled
6936 * content.
6937 */
6938 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6939 ctxt->depth++;
6940 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6941 user_data, &list);
6942 ctxt->depth--;
6943
6944 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6945 ctxt->depth++;
6946 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6947 user_data, ctxt->depth, ent->URI,
6948 ent->ExternalID, &list);
6949 ctxt->depth--;
6950 } else {
6951 ret = XML_ERR_ENTITY_PE_INTERNAL;
6952 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6953 "invalid entity type found\n", NULL);
6954 }
6955
6956 /*
6957 * Store the number of entities needing parsing for this entity
6958 * content and do checkings
6959 */
6960 ent->checked = ctxt->nbentities - oldnbent;
6961 if (ret == XML_ERR_ENTITY_LOOP) {
6962 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6963 xmlFreeNodeList(list);
6964 return;
6965 }
6966 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6967 xmlFreeNodeList(list);
6968 return;
6969 }
6970
6971 if ((ret == XML_ERR_OK) && (list != NULL)) {
6972 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6973 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6974 (ent->children == NULL)) {
6975 ent->children = list;
6976 if (ctxt->replaceEntities) {
6977 /*
6978 * Prune it directly in the generated document
6979 * except for single text nodes.
6980 */
6981 if (((list->type == XML_TEXT_NODE) &&
6982 (list->next == NULL)) ||
6983 (ctxt->parseMode == XML_PARSE_READER)) {
6984 list->parent = (xmlNodePtr) ent;
6985 list = NULL;
6986 ent->owner = 1;
6987 } else {
6988 ent->owner = 0;
6989 while (list != NULL) {
6990 list->parent = (xmlNodePtr) ctxt->node;
6991 list->doc = ctxt->myDoc;
6992 if (list->next == NULL)
6993 ent->last = list;
6994 list = list->next;
6995 }
6996 list = ent->children;
6997 #ifdef LIBXML_LEGACY_ENABLED
6998 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6999 xmlAddEntityReference(ent, list, NULL);
7000 #endif /* LIBXML_LEGACY_ENABLED */
7001 }
7002 } else {
7003 ent->owner = 1;
7004 while (list != NULL) {
7005 list->parent = (xmlNodePtr) ent;
7006 xmlSetTreeDoc(list, ent->doc);
7007 if (list->next == NULL)
7008 ent->last = list;
7009 list = list->next;
7010 }
7011 }
7012 } else {
7013 xmlFreeNodeList(list);
7014 list = NULL;
7015 }
7016 } else if ((ret != XML_ERR_OK) &&
7017 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7018 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7019 "Entity '%s' failed to parse\n", ent->name);
7020 } else if (list != NULL) {
7021 xmlFreeNodeList(list);
7022 list = NULL;
7023 }
7024 if (ent->checked == 0)
7025 ent->checked = 1;
7026 } else if (ent->checked != 1) {
7027 ctxt->nbentities += ent->checked;
7028 }
7029
7030 /*
7031 * Now that the entity content has been gathered
7032 * provide it to the application, this can take different forms based
7033 * on the parsing modes.
7034 */
7035 if (ent->children == NULL) {
7036 /*
7037 * Probably running in SAX mode and the callbacks don't
7038 * build the entity content. So unless we already went
7039 * though parsing for first checking go though the entity
7040 * content to generate callbacks associated to the entity
7041 */
7042 if (was_checked != 0) {
7043 void *user_data;
7044 /*
7045 * This is a bit hackish but this seems the best
7046 * way to make sure both SAX and DOM entity support
7047 * behaves okay.
7048 */
7049 if (ctxt->userData == ctxt)
7050 user_data = NULL;
7051 else
7052 user_data = ctxt->userData;
7053
7054 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7055 ctxt->depth++;
7056 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7057 ent->content, user_data, NULL);
7058 ctxt->depth--;
7059 } else if (ent->etype ==
7060 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7061 ctxt->depth++;
7062 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7063 ctxt->sax, user_data, ctxt->depth,
7064 ent->URI, ent->ExternalID, NULL);
7065 ctxt->depth--;
7066 } else {
7067 ret = XML_ERR_ENTITY_PE_INTERNAL;
7068 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7069 "invalid entity type found\n", NULL);
7070 }
7071 if (ret == XML_ERR_ENTITY_LOOP) {
7072 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7073 return;
7074 }
7075 }
7076 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7077 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7078 /*
7079 * Entity reference callback comes second, it's somewhat
7080 * superfluous but a compatibility to historical behaviour
7081 */
7082 ctxt->sax->reference(ctxt->userData, ent->name);
7083 }
7084 return;
7085 }
7086
7087 /*
7088 * If we didn't get any children for the entity being built
7089 */
7090 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7091 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7092 /*
7093 * Create a node.
7094 */
7095 ctxt->sax->reference(ctxt->userData, ent->name);
7096 return;
7097 }
7098
7099 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7100 /*
7101 * There is a problem on the handling of _private for entities
7102 * (bug 155816): Should we copy the content of the field from
7103 * the entity (possibly overwriting some value set by the user
7104 * when a copy is created), should we leave it alone, or should
7105 * we try to take care of different situations? The problem
7106 * is exacerbated by the usage of this field by the xmlReader.
7107 * To fix this bug, we look at _private on the created node
7108 * and, if it's NULL, we copy in whatever was in the entity.
7109 * If it's not NULL we leave it alone. This is somewhat of a
7110 * hack - maybe we should have further tests to determine
7111 * what to do.
7112 */
7113 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7114 /*
7115 * Seems we are generating the DOM content, do
7116 * a simple tree copy for all references except the first
7117 * In the first occurrence list contains the replacement.
7118 * progressive == 2 means we are operating on the Reader
7119 * and since nodes are discarded we must copy all the time.
7120 */
7121 if (((list == NULL) && (ent->owner == 0)) ||
7122 (ctxt->parseMode == XML_PARSE_READER)) {
7123 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7124
7125 /*
7126 * when operating on a reader, the entities definitions
7127 * are always owning the entities subtree.
7128 if (ctxt->parseMode == XML_PARSE_READER)
7129 ent->owner = 1;
7130 */
7131
7132 cur = ent->children;
7133 while (cur != NULL) {
7134 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7135 if (nw != NULL) {
7136 if (nw->_private == NULL)
7137 nw->_private = cur->_private;
7138 if (firstChild == NULL){
7139 firstChild = nw;
7140 }
7141 nw = xmlAddChild(ctxt->node, nw);
7142 }
7143 if (cur == ent->last) {
7144 /*
7145 * needed to detect some strange empty
7146 * node cases in the reader tests
7147 */
7148 if ((ctxt->parseMode == XML_PARSE_READER) &&
7149 (nw != NULL) &&
7150 (nw->type == XML_ELEMENT_NODE) &&
7151 (nw->children == NULL))
7152 nw->extra = 1;
7153
7154 break;
7155 }
7156 cur = cur->next;
7157 }
7158 #ifdef LIBXML_LEGACY_ENABLED
7159 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7160 xmlAddEntityReference(ent, firstChild, nw);
7161 #endif /* LIBXML_LEGACY_ENABLED */
7162 } else if (list == NULL) {
7163 xmlNodePtr nw = NULL, cur, next, last,
7164 firstChild = NULL;
7165 /*
7166 * Copy the entity child list and make it the new
7167 * entity child list. The goal is to make sure any
7168 * ID or REF referenced will be the one from the
7169 * document content and not the entity copy.
7170 */
7171 cur = ent->children;
7172 ent->children = NULL;
7173 last = ent->last;
7174 ent->last = NULL;
7175 while (cur != NULL) {
7176 next = cur->next;
7177 cur->next = NULL;
7178 cur->parent = NULL;
7179 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7180 if (nw != NULL) {
7181 if (nw->_private == NULL)
7182 nw->_private = cur->_private;
7183 if (firstChild == NULL){
7184 firstChild = cur;
7185 }
7186 xmlAddChild((xmlNodePtr) ent, nw);
7187 xmlAddChild(ctxt->node, cur);
7188 }
7189 if (cur == last)
7190 break;
7191 cur = next;
7192 }
7193 if (ent->owner == 0)
7194 ent->owner = 1;
7195 #ifdef LIBXML_LEGACY_ENABLED
7196 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7197 xmlAddEntityReference(ent, firstChild, nw);
7198 #endif /* LIBXML_LEGACY_ENABLED */
7199 } else {
7200 const xmlChar *nbktext;
7201
7202 /*
7203 * the name change is to avoid coalescing of the
7204 * node with a possible previous text one which
7205 * would make ent->children a dangling pointer
7206 */
7207 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7208 -1);
7209 if (ent->children->type == XML_TEXT_NODE)
7210 ent->children->name = nbktext;
7211 if ((ent->last != ent->children) &&
7212 (ent->last->type == XML_TEXT_NODE))
7213 ent->last->name = nbktext;
7214 xmlAddChildList(ctxt->node, ent->children);
7215 }
7216
7217 /*
7218 * This is to avoid a nasty side effect, see
7219 * characters() in SAX.c
7220 */
7221 ctxt->nodemem = 0;
7222 ctxt->nodelen = 0;
7223 return;
7224 }
7225 }
7226 }
7227
7228 /**
7229 * xmlParseEntityRef:
7230 * @ctxt: an XML parser context
7231 *
7232 * parse ENTITY references declarations
7233 *
7234 * [68] EntityRef ::= '&' Name ';'
7235 *
7236 * [ WFC: Entity Declared ]
7237 * In a document without any DTD, a document with only an internal DTD
7238 * subset which contains no parameter entity references, or a document
7239 * with "standalone='yes'", the Name given in the entity reference
7240 * must match that in an entity declaration, except that well-formed
7241 * documents need not declare any of the following entities: amp, lt,
7242 * gt, apos, quot. The declaration of a parameter entity must precede
7243 * any reference to it. Similarly, the declaration of a general entity
7244 * must precede any reference to it which appears in a default value in an
7245 * attribute-list declaration. Note that if entities are declared in the
7246 * external subset or in external parameter entities, a non-validating
7247 * processor is not obligated to read and process their declarations;
7248 * for such documents, the rule that an entity must be declared is a
7249 * well-formedness constraint only if standalone='yes'.
7250 *
7251 * [ WFC: Parsed Entity ]
7252 * An entity reference must not contain the name of an unparsed entity
7253 *
7254 * Returns the xmlEntityPtr if found, or NULL otherwise.
7255 */
7256 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7257 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7258 const xmlChar *name;
7259 xmlEntityPtr ent = NULL;
7260
7261 GROW;
7262
7263 if (RAW != '&')
7264 return(NULL);
7265 NEXT;
7266 name = xmlParseName(ctxt);
7267 if (name == NULL) {
7268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7269 "xmlParseEntityRef: no name\n");
7270 return(NULL);
7271 }
7272 if (RAW != ';') {
7273 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7274 return(NULL);
7275 }
7276 NEXT;
7277
7278 /*
7279 * Predefined entites override any extra definition
7280 */
7281 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7282 ent = xmlGetPredefinedEntity(name);
7283 if (ent != NULL)
7284 return(ent);
7285 }
7286
7287 /*
7288 * Increate the number of entity references parsed
7289 */
7290 ctxt->nbentities++;
7291
7292 /*
7293 * Ask first SAX for entity resolution, otherwise try the
7294 * entities which may have stored in the parser context.
7295 */
7296 if (ctxt->sax != NULL) {
7297 if (ctxt->sax->getEntity != NULL)
7298 ent = ctxt->sax->getEntity(ctxt->userData, name);
7299 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7300 (ctxt->options & XML_PARSE_OLDSAX))
7301 ent = xmlGetPredefinedEntity(name);
7302 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7303 (ctxt->userData==ctxt)) {
7304 ent = xmlSAX2GetEntity(ctxt, name);
7305 }
7306 }
7307 /*
7308 * [ WFC: Entity Declared ]
7309 * In a document without any DTD, a document with only an
7310 * internal DTD subset which contains no parameter entity
7311 * references, or a document with "standalone='yes'", the
7312 * Name given in the entity reference must match that in an
7313 * entity declaration, except that well-formed documents
7314 * need not declare any of the following entities: amp, lt,
7315 * gt, apos, quot.
7316 * The declaration of a parameter entity must precede any
7317 * reference to it.
7318 * Similarly, the declaration of a general entity must
7319 * precede any reference to it which appears in a default
7320 * value in an attribute-list declaration. Note that if
7321 * entities are declared in the external subset or in
7322 * external parameter entities, a non-validating processor
7323 * is not obligated to read and process their declarations;
7324 * for such documents, the rule that an entity must be
7325 * declared is a well-formedness constraint only if
7326 * standalone='yes'.
7327 */
7328 if (ent == NULL) {
7329 if ((ctxt->standalone == 1) ||
7330 ((ctxt->hasExternalSubset == 0) &&
7331 (ctxt->hasPErefs == 0))) {
7332 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7333 "Entity '%s' not defined\n", name);
7334 } else {
7335 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7336 "Entity '%s' not defined\n", name);
7337 if ((ctxt->inSubset == 0) &&
7338 (ctxt->sax != NULL) &&
7339 (ctxt->sax->reference != NULL)) {
7340 ctxt->sax->reference(ctxt->userData, name);
7341 }
7342 }
7343 ctxt->valid = 0;
7344 }
7345
7346 /*
7347 * [ WFC: Parsed Entity ]
7348 * An entity reference must not contain the name of an
7349 * unparsed entity
7350 */
7351 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7352 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7353 "Entity reference to unparsed entity %s\n", name);
7354 }
7355
7356 /*
7357 * [ WFC: No External Entity References ]
7358 * Attribute values cannot contain direct or indirect
7359 * entity references to external entities.
7360 */
7361 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7362 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7363 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7364 "Attribute references external entity '%s'\n", name);
7365 }
7366 /*
7367 * [ WFC: No < in Attribute Values ]
7368 * The replacement text of any entity referred to directly or
7369 * indirectly in an attribute value (other than "<") must
7370 * not contain a <.
7371 */
7372 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7373 (ent != NULL) && (ent->content != NULL) &&
7374 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7375 (xmlStrchr(ent->content, '<'))) {
7376 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7377 "'<' in entity '%s' is not allowed in attributes values\n", name);
7378 }
7379
7380 /*
7381 * Internal check, no parameter entities here ...
7382 */
7383 else {
7384 switch (ent->etype) {
7385 case XML_INTERNAL_PARAMETER_ENTITY:
7386 case XML_EXTERNAL_PARAMETER_ENTITY:
7387 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7388 "Attempt to reference the parameter entity '%s'\n",
7389 name);
7390 break;
7391 default:
7392 break;
7393 }
7394 }
7395
7396 /*
7397 * [ WFC: No Recursion ]
7398 * A parsed entity must not contain a recursive reference
7399 * to itself, either directly or indirectly.
7400 * Done somewhere else
7401 */
7402 return(ent);
7403 }
7404
7405 /**
7406 * xmlParseStringEntityRef:
7407 * @ctxt: an XML parser context
7408 * @str: a pointer to an index in the string
7409 *
7410 * parse ENTITY references declarations, but this version parses it from
7411 * a string value.
7412 *
7413 * [68] EntityRef ::= '&' Name ';'
7414 *
7415 * [ WFC: Entity Declared ]
7416 * In a document without any DTD, a document with only an internal DTD
7417 * subset which contains no parameter entity references, or a document
7418 * with "standalone='yes'", the Name given in the entity reference
7419 * must match that in an entity declaration, except that well-formed
7420 * documents need not declare any of the following entities: amp, lt,
7421 * gt, apos, quot. The declaration of a parameter entity must precede
7422 * any reference to it. Similarly, the declaration of a general entity
7423 * must precede any reference to it which appears in a default value in an
7424 * attribute-list declaration. Note that if entities are declared in the
7425 * external subset or in external parameter entities, a non-validating
7426 * processor is not obligated to read and process their declarations;
7427 * for such documents, the rule that an entity must be declared is a
7428 * well-formedness constraint only if standalone='yes'.
7429 *
7430 * [ WFC: Parsed Entity ]
7431 * An entity reference must not contain the name of an unparsed entity
7432 *
7433 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7434 * is updated to the current location in the string.
7435 */
7436 static xmlEntityPtr
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7437 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7438 xmlChar *name;
7439 const xmlChar *ptr;
7440 xmlChar cur;
7441 xmlEntityPtr ent = NULL;
7442
7443 if ((str == NULL) || (*str == NULL))
7444 return(NULL);
7445 ptr = *str;
7446 cur = *ptr;
7447 if (cur != '&')
7448 return(NULL);
7449
7450 ptr++;
7451 name = xmlParseStringName(ctxt, &ptr);
7452 if (name == NULL) {
7453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7454 "xmlParseStringEntityRef: no name\n");
7455 *str = ptr;
7456 return(NULL);
7457 }
7458 if (*ptr != ';') {
7459 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7460 xmlFree(name);
7461 *str = ptr;
7462 return(NULL);
7463 }
7464 ptr++;
7465
7466
7467 /*
7468 * Predefined entites override any extra definition
7469 */
7470 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7471 ent = xmlGetPredefinedEntity(name);
7472 if (ent != NULL) {
7473 xmlFree(name);
7474 *str = ptr;
7475 return(ent);
7476 }
7477 }
7478
7479 /*
7480 * Increate the number of entity references parsed
7481 */
7482 ctxt->nbentities++;
7483
7484 /*
7485 * Ask first SAX for entity resolution, otherwise try the
7486 * entities which may have stored in the parser context.
7487 */
7488 if (ctxt->sax != NULL) {
7489 if (ctxt->sax->getEntity != NULL)
7490 ent = ctxt->sax->getEntity(ctxt->userData, name);
7491 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7492 ent = xmlGetPredefinedEntity(name);
7493 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7494 ent = xmlSAX2GetEntity(ctxt, name);
7495 }
7496 }
7497
7498 /*
7499 * [ WFC: Entity Declared ]
7500 * In a document without any DTD, a document with only an
7501 * internal DTD subset which contains no parameter entity
7502 * references, or a document with "standalone='yes'", the
7503 * Name given in the entity reference must match that in an
7504 * entity declaration, except that well-formed documents
7505 * need not declare any of the following entities: amp, lt,
7506 * gt, apos, quot.
7507 * The declaration of a parameter entity must precede any
7508 * reference to it.
7509 * Similarly, the declaration of a general entity must
7510 * precede any reference to it which appears in a default
7511 * value in an attribute-list declaration. Note that if
7512 * entities are declared in the external subset or in
7513 * external parameter entities, a non-validating processor
7514 * is not obligated to read and process their declarations;
7515 * for such documents, the rule that an entity must be
7516 * declared is a well-formedness constraint only if
7517 * standalone='yes'.
7518 */
7519 if (ent == NULL) {
7520 if ((ctxt->standalone == 1) ||
7521 ((ctxt->hasExternalSubset == 0) &&
7522 (ctxt->hasPErefs == 0))) {
7523 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7524 "Entity '%s' not defined\n", name);
7525 } else {
7526 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7527 "Entity '%s' not defined\n",
7528 name);
7529 }
7530 /* TODO ? check regressions ctxt->valid = 0; */
7531 }
7532
7533 /*
7534 * [ WFC: Parsed Entity ]
7535 * An entity reference must not contain the name of an
7536 * unparsed entity
7537 */
7538 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7539 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7540 "Entity reference to unparsed entity %s\n", name);
7541 }
7542
7543 /*
7544 * [ WFC: No External Entity References ]
7545 * Attribute values cannot contain direct or indirect
7546 * entity references to external entities.
7547 */
7548 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7549 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7551 "Attribute references external entity '%s'\n", name);
7552 }
7553 /*
7554 * [ WFC: No < in Attribute Values ]
7555 * The replacement text of any entity referred to directly or
7556 * indirectly in an attribute value (other than "<") must
7557 * not contain a <.
7558 */
7559 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7560 (ent != NULL) && (ent->content != NULL) &&
7561 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7562 (xmlStrchr(ent->content, '<'))) {
7563 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7564 "'<' in entity '%s' is not allowed in attributes values\n",
7565 name);
7566 }
7567
7568 /*
7569 * Internal check, no parameter entities here ...
7570 */
7571 else {
7572 switch (ent->etype) {
7573 case XML_INTERNAL_PARAMETER_ENTITY:
7574 case XML_EXTERNAL_PARAMETER_ENTITY:
7575 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7576 "Attempt to reference the parameter entity '%s'\n",
7577 name);
7578 break;
7579 default:
7580 break;
7581 }
7582 }
7583
7584 /*
7585 * [ WFC: No Recursion ]
7586 * A parsed entity must not contain a recursive reference
7587 * to itself, either directly or indirectly.
7588 * Done somewhere else
7589 */
7590
7591 xmlFree(name);
7592 *str = ptr;
7593 return(ent);
7594 }
7595
7596 /**
7597 * xmlParsePEReference:
7598 * @ctxt: an XML parser context
7599 *
7600 * parse PEReference declarations
7601 * The entity content is handled directly by pushing it's content as
7602 * a new input stream.
7603 *
7604 * [69] PEReference ::= '%' Name ';'
7605 *
7606 * [ WFC: No Recursion ]
7607 * A parsed entity must not contain a recursive
7608 * reference to itself, either directly or indirectly.
7609 *
7610 * [ WFC: Entity Declared ]
7611 * In a document without any DTD, a document with only an internal DTD
7612 * subset which contains no parameter entity references, or a document
7613 * with "standalone='yes'", ... ... The declaration of a parameter
7614 * entity must precede any reference to it...
7615 *
7616 * [ VC: Entity Declared ]
7617 * In a document with an external subset or external parameter entities
7618 * with "standalone='no'", ... ... The declaration of a parameter entity
7619 * must precede any reference to it...
7620 *
7621 * [ WFC: In DTD ]
7622 * Parameter-entity references may only appear in the DTD.
7623 * NOTE: misleading but this is handled.
7624 */
7625 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7626 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7627 {
7628 const xmlChar *name;
7629 xmlEntityPtr entity = NULL;
7630 xmlParserInputPtr input;
7631
7632 if (RAW != '%')
7633 return;
7634 NEXT;
7635 name = xmlParseName(ctxt);
7636 if (name == NULL) {
7637 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7638 "xmlParsePEReference: no name\n");
7639 return;
7640 }
7641 if (RAW != ';') {
7642 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7643 return;
7644 }
7645
7646 NEXT;
7647
7648 /*
7649 * Increate the number of entity references parsed
7650 */
7651 ctxt->nbentities++;
7652
7653 /*
7654 * Request the entity from SAX
7655 */
7656 if ((ctxt->sax != NULL) &&
7657 (ctxt->sax->getParameterEntity != NULL))
7658 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7659 name);
7660 if (entity == NULL) {
7661 /*
7662 * [ WFC: Entity Declared ]
7663 * In a document without any DTD, a document with only an
7664 * internal DTD subset which contains no parameter entity
7665 * references, or a document with "standalone='yes'", ...
7666 * ... The declaration of a parameter entity must precede
7667 * any reference to it...
7668 */
7669 if ((ctxt->standalone == 1) ||
7670 ((ctxt->hasExternalSubset == 0) &&
7671 (ctxt->hasPErefs == 0))) {
7672 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673 "PEReference: %%%s; not found\n",
7674 name);
7675 } else {
7676 /*
7677 * [ VC: Entity Declared ]
7678 * In a document with an external subset or external
7679 * parameter entities with "standalone='no'", ...
7680 * ... The declaration of a parameter entity must
7681 * precede any reference to it...
7682 */
7683 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7684 "PEReference: %%%s; not found\n",
7685 name, NULL);
7686 ctxt->valid = 0;
7687 }
7688 } else {
7689 /*
7690 * Internal checking in case the entity quest barfed
7691 */
7692 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7693 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7694 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7695 "Internal: %%%s; is not a parameter entity\n",
7696 name, NULL);
7697 } else if (ctxt->input->free != deallocblankswrapper) {
7698 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7699 if (xmlPushInput(ctxt, input) < 0)
7700 return;
7701 } else {
7702 /*
7703 * TODO !!!
7704 * handle the extra spaces added before and after
7705 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7706 */
7707 input = xmlNewEntityInputStream(ctxt, entity);
7708 if (xmlPushInput(ctxt, input) < 0)
7709 return;
7710 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7711 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7712 (IS_BLANK_CH(NXT(5)))) {
7713 xmlParseTextDecl(ctxt);
7714 if (ctxt->errNo ==
7715 XML_ERR_UNSUPPORTED_ENCODING) {
7716 /*
7717 * The XML REC instructs us to stop parsing
7718 * right here
7719 */
7720 ctxt->instate = XML_PARSER_EOF;
7721 return;
7722 }
7723 }
7724 }
7725 }
7726 ctxt->hasPErefs = 1;
7727 }
7728
7729 /**
7730 * xmlLoadEntityContent:
7731 * @ctxt: an XML parser context
7732 * @entity: an unloaded system entity
7733 *
7734 * Load the original content of the given system entity from the
7735 * ExternalID/SystemID given. This is to be used for Included in Literal
7736 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7737 *
7738 * Returns 0 in case of success and -1 in case of failure
7739 */
7740 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7741 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7742 xmlParserInputPtr input;
7743 xmlBufferPtr buf;
7744 int l, c;
7745 int count = 0;
7746
7747 if ((ctxt == NULL) || (entity == NULL) ||
7748 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7749 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7750 (entity->content != NULL)) {
7751 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7752 "xmlLoadEntityContent parameter error");
7753 return(-1);
7754 }
7755
7756 if (xmlParserDebugEntities)
7757 xmlGenericError(xmlGenericErrorContext,
7758 "Reading %s entity content input\n", entity->name);
7759
7760 buf = xmlBufferCreate();
7761 if (buf == NULL) {
7762 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7763 "xmlLoadEntityContent parameter error");
7764 return(-1);
7765 }
7766
7767 input = xmlNewEntityInputStream(ctxt, entity);
7768 if (input == NULL) {
7769 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7770 "xmlLoadEntityContent input error");
7771 xmlBufferFree(buf);
7772 return(-1);
7773 }
7774
7775 /*
7776 * Push the entity as the current input, read char by char
7777 * saving to the buffer until the end of the entity or an error
7778 */
7779 if (xmlPushInput(ctxt, input) < 0) {
7780 xmlBufferFree(buf);
7781 return(-1);
7782 }
7783
7784 GROW;
7785 c = CUR_CHAR(l);
7786 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7787 (IS_CHAR(c))) {
7788 xmlBufferAdd(buf, ctxt->input->cur, l);
7789 if (count++ > 100) {
7790 count = 0;
7791 GROW;
7792 }
7793 NEXTL(l);
7794 c = CUR_CHAR(l);
7795 }
7796
7797 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7798 xmlPopInput(ctxt);
7799 } else if (!IS_CHAR(c)) {
7800 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7801 "xmlLoadEntityContent: invalid char value %d\n",
7802 c);
7803 xmlBufferFree(buf);
7804 return(-1);
7805 }
7806 entity->content = buf->content;
7807 buf->content = NULL;
7808 xmlBufferFree(buf);
7809
7810 return(0);
7811 }
7812
7813 /**
7814 * xmlParseStringPEReference:
7815 * @ctxt: an XML parser context
7816 * @str: a pointer to an index in the string
7817 *
7818 * parse PEReference declarations
7819 *
7820 * [69] PEReference ::= '%' Name ';'
7821 *
7822 * [ WFC: No Recursion ]
7823 * A parsed entity must not contain a recursive
7824 * reference to itself, either directly or indirectly.
7825 *
7826 * [ WFC: Entity Declared ]
7827 * In a document without any DTD, a document with only an internal DTD
7828 * subset which contains no parameter entity references, or a document
7829 * with "standalone='yes'", ... ... The declaration of a parameter
7830 * entity must precede any reference to it...
7831 *
7832 * [ VC: Entity Declared ]
7833 * In a document with an external subset or external parameter entities
7834 * with "standalone='no'", ... ... The declaration of a parameter entity
7835 * must precede any reference to it...
7836 *
7837 * [ WFC: In DTD ]
7838 * Parameter-entity references may only appear in the DTD.
7839 * NOTE: misleading but this is handled.
7840 *
7841 * Returns the string of the entity content.
7842 * str is updated to the current value of the index
7843 */
7844 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)7845 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7846 const xmlChar *ptr;
7847 xmlChar cur;
7848 xmlChar *name;
7849 xmlEntityPtr entity = NULL;
7850
7851 if ((str == NULL) || (*str == NULL)) return(NULL);
7852 ptr = *str;
7853 cur = *ptr;
7854 if (cur != '%')
7855 return(NULL);
7856 ptr++;
7857 name = xmlParseStringName(ctxt, &ptr);
7858 if (name == NULL) {
7859 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7860 "xmlParseStringPEReference: no name\n");
7861 *str = ptr;
7862 return(NULL);
7863 }
7864 cur = *ptr;
7865 if (cur != ';') {
7866 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7867 xmlFree(name);
7868 *str = ptr;
7869 return(NULL);
7870 }
7871 ptr++;
7872
7873 /*
7874 * Increate the number of entity references parsed
7875 */
7876 ctxt->nbentities++;
7877
7878 /*
7879 * Request the entity from SAX
7880 */
7881 if ((ctxt->sax != NULL) &&
7882 (ctxt->sax->getParameterEntity != NULL))
7883 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7884 name);
7885 if (entity == NULL) {
7886 /*
7887 * [ WFC: Entity Declared ]
7888 * In a document without any DTD, a document with only an
7889 * internal DTD subset which contains no parameter entity
7890 * references, or a document with "standalone='yes'", ...
7891 * ... The declaration of a parameter entity must precede
7892 * any reference to it...
7893 */
7894 if ((ctxt->standalone == 1) ||
7895 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7897 "PEReference: %%%s; not found\n", name);
7898 } else {
7899 /*
7900 * [ VC: Entity Declared ]
7901 * In a document with an external subset or external
7902 * parameter entities with "standalone='no'", ...
7903 * ... The declaration of a parameter entity must
7904 * precede any reference to it...
7905 */
7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 "PEReference: %%%s; not found\n",
7908 name, NULL);
7909 ctxt->valid = 0;
7910 }
7911 } else {
7912 /*
7913 * Internal checking in case the entity quest barfed
7914 */
7915 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7916 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7917 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7918 "%%%s; is not a parameter entity\n",
7919 name, NULL);
7920 }
7921 }
7922 ctxt->hasPErefs = 1;
7923 xmlFree(name);
7924 *str = ptr;
7925 return(entity);
7926 }
7927
7928 /**
7929 * xmlParseDocTypeDecl:
7930 * @ctxt: an XML parser context
7931 *
7932 * parse a DOCTYPE declaration
7933 *
7934 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7935 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7936 *
7937 * [ VC: Root Element Type ]
7938 * The Name in the document type declaration must match the element
7939 * type of the root element.
7940 */
7941
7942 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)7943 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7944 const xmlChar *name = NULL;
7945 xmlChar *ExternalID = NULL;
7946 xmlChar *URI = NULL;
7947
7948 /*
7949 * We know that '<!DOCTYPE' has been detected.
7950 */
7951 SKIP(9);
7952
7953 SKIP_BLANKS;
7954
7955 /*
7956 * Parse the DOCTYPE name.
7957 */
7958 name = xmlParseName(ctxt);
7959 if (name == NULL) {
7960 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7961 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7962 }
7963 ctxt->intSubName = name;
7964
7965 SKIP_BLANKS;
7966
7967 /*
7968 * Check for SystemID and ExternalID
7969 */
7970 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7971
7972 if ((URI != NULL) || (ExternalID != NULL)) {
7973 ctxt->hasExternalSubset = 1;
7974 }
7975 ctxt->extSubURI = URI;
7976 ctxt->extSubSystem = ExternalID;
7977
7978 SKIP_BLANKS;
7979
7980 /*
7981 * Create and update the internal subset.
7982 */
7983 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7984 (!ctxt->disableSAX))
7985 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7986
7987 /*
7988 * Is there any internal subset declarations ?
7989 * they are handled separately in xmlParseInternalSubset()
7990 */
7991 if (RAW == '[')
7992 return;
7993
7994 /*
7995 * We should be at the end of the DOCTYPE declaration.
7996 */
7997 if (RAW != '>') {
7998 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7999 }
8000 NEXT;
8001 }
8002
8003 /**
8004 * xmlParseInternalSubset:
8005 * @ctxt: an XML parser context
8006 *
8007 * parse the internal subset declaration
8008 *
8009 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8010 */
8011
8012 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8013 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8014 /*
8015 * Is there any DTD definition ?
8016 */
8017 if (RAW == '[') {
8018 ctxt->instate = XML_PARSER_DTD;
8019 NEXT;
8020 /*
8021 * Parse the succession of Markup declarations and
8022 * PEReferences.
8023 * Subsequence (markupdecl | PEReference | S)*
8024 */
8025 while (RAW != ']') {
8026 const xmlChar *check = CUR_PTR;
8027 unsigned int cons = ctxt->input->consumed;
8028
8029 SKIP_BLANKS;
8030 xmlParseMarkupDecl(ctxt);
8031 xmlParsePEReference(ctxt);
8032
8033 /*
8034 * Pop-up of finished entities.
8035 */
8036 while ((RAW == 0) && (ctxt->inputNr > 1))
8037 xmlPopInput(ctxt);
8038
8039 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8040 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8041 "xmlParseInternalSubset: error detected in Markup declaration\n");
8042 break;
8043 }
8044 }
8045 if (RAW == ']') {
8046 NEXT;
8047 SKIP_BLANKS;
8048 }
8049 }
8050
8051 /*
8052 * We should be at the end of the DOCTYPE declaration.
8053 */
8054 if (RAW != '>') {
8055 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8056 }
8057 NEXT;
8058 }
8059
8060 #ifdef LIBXML_SAX1_ENABLED
8061 /**
8062 * xmlParseAttribute:
8063 * @ctxt: an XML parser context
8064 * @value: a xmlChar ** used to store the value of the attribute
8065 *
8066 * parse an attribute
8067 *
8068 * [41] Attribute ::= Name Eq AttValue
8069 *
8070 * [ WFC: No External Entity References ]
8071 * Attribute values cannot contain direct or indirect entity references
8072 * to external entities.
8073 *
8074 * [ WFC: No < in Attribute Values ]
8075 * The replacement text of any entity referred to directly or indirectly in
8076 * an attribute value (other than "<") must not contain a <.
8077 *
8078 * [ VC: Attribute Value Type ]
8079 * The attribute must have been declared; the value must be of the type
8080 * declared for it.
8081 *
8082 * [25] Eq ::= S? '=' S?
8083 *
8084 * With namespace:
8085 *
8086 * [NS 11] Attribute ::= QName Eq AttValue
8087 *
8088 * Also the case QName == xmlns:??? is handled independently as a namespace
8089 * definition.
8090 *
8091 * Returns the attribute name, and the value in *value.
8092 */
8093
8094 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8095 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8096 const xmlChar *name;
8097 xmlChar *val;
8098
8099 *value = NULL;
8100 GROW;
8101 name = xmlParseName(ctxt);
8102 if (name == NULL) {
8103 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8104 "error parsing attribute name\n");
8105 return(NULL);
8106 }
8107
8108 /*
8109 * read the value
8110 */
8111 SKIP_BLANKS;
8112 if (RAW == '=') {
8113 NEXT;
8114 SKIP_BLANKS;
8115 val = xmlParseAttValue(ctxt);
8116 ctxt->instate = XML_PARSER_CONTENT;
8117 } else {
8118 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8119 "Specification mandate value for attribute %s\n", name);
8120 return(NULL);
8121 }
8122
8123 /*
8124 * Check that xml:lang conforms to the specification
8125 * No more registered as an error, just generate a warning now
8126 * since this was deprecated in XML second edition
8127 */
8128 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8129 if (!xmlCheckLanguageID(val)) {
8130 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8131 "Malformed value for xml:lang : %s\n",
8132 val, NULL);
8133 }
8134 }
8135
8136 /*
8137 * Check that xml:space conforms to the specification
8138 */
8139 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8140 if (xmlStrEqual(val, BAD_CAST "default"))
8141 *(ctxt->space) = 0;
8142 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8143 *(ctxt->space) = 1;
8144 else {
8145 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8146 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8147 val, NULL);
8148 }
8149 }
8150
8151 *value = val;
8152 return(name);
8153 }
8154
8155 /**
8156 * xmlParseStartTag:
8157 * @ctxt: an XML parser context
8158 *
8159 * parse a start of tag either for rule element or
8160 * EmptyElement. In both case we don't parse the tag closing chars.
8161 *
8162 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8163 *
8164 * [ WFC: Unique Att Spec ]
8165 * No attribute name may appear more than once in the same start-tag or
8166 * empty-element tag.
8167 *
8168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8169 *
8170 * [ WFC: Unique Att Spec ]
8171 * No attribute name may appear more than once in the same start-tag or
8172 * empty-element tag.
8173 *
8174 * With namespace:
8175 *
8176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8177 *
8178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8179 *
8180 * Returns the element name parsed
8181 */
8182
8183 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8184 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8185 const xmlChar *name;
8186 const xmlChar *attname;
8187 xmlChar *attvalue;
8188 const xmlChar **atts = ctxt->atts;
8189 int nbatts = 0;
8190 int maxatts = ctxt->maxatts;
8191 int i;
8192
8193 if (RAW != '<') return(NULL);
8194 NEXT1;
8195
8196 name = xmlParseName(ctxt);
8197 if (name == NULL) {
8198 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8199 "xmlParseStartTag: invalid element name\n");
8200 return(NULL);
8201 }
8202
8203 /*
8204 * Now parse the attributes, it ends up with the ending
8205 *
8206 * (S Attribute)* S?
8207 */
8208 SKIP_BLANKS;
8209 GROW;
8210
8211 while ((RAW != '>') &&
8212 ((RAW != '/') || (NXT(1) != '>')) &&
8213 (IS_BYTE_CHAR(RAW))) {
8214 const xmlChar *q = CUR_PTR;
8215 unsigned int cons = ctxt->input->consumed;
8216
8217 attname = xmlParseAttribute(ctxt, &attvalue);
8218 if ((attname != NULL) && (attvalue != NULL)) {
8219 /*
8220 * [ WFC: Unique Att Spec ]
8221 * No attribute name may appear more than once in the same
8222 * start-tag or empty-element tag.
8223 */
8224 for (i = 0; i < nbatts;i += 2) {
8225 if (xmlStrEqual(atts[i], attname)) {
8226 xmlErrAttributeDup(ctxt, NULL, attname);
8227 xmlFree(attvalue);
8228 goto failed;
8229 }
8230 }
8231 /*
8232 * Add the pair to atts
8233 */
8234 if (atts == NULL) {
8235 maxatts = 22; /* allow for 10 attrs by default */
8236 atts = (const xmlChar **)
8237 xmlMalloc(maxatts * sizeof(xmlChar *));
8238 if (atts == NULL) {
8239 xmlErrMemory(ctxt, NULL);
8240 if (attvalue != NULL)
8241 xmlFree(attvalue);
8242 goto failed;
8243 }
8244 ctxt->atts = atts;
8245 ctxt->maxatts = maxatts;
8246 } else if (nbatts + 4 > maxatts) {
8247 const xmlChar **n;
8248
8249 maxatts *= 2;
8250 n = (const xmlChar **) xmlRealloc((void *) atts,
8251 maxatts * sizeof(const xmlChar *));
8252 if (n == NULL) {
8253 xmlErrMemory(ctxt, NULL);
8254 if (attvalue != NULL)
8255 xmlFree(attvalue);
8256 goto failed;
8257 }
8258 atts = n;
8259 ctxt->atts = atts;
8260 ctxt->maxatts = maxatts;
8261 }
8262 atts[nbatts++] = attname;
8263 atts[nbatts++] = attvalue;
8264 atts[nbatts] = NULL;
8265 atts[nbatts + 1] = NULL;
8266 } else {
8267 if (attvalue != NULL)
8268 xmlFree(attvalue);
8269 }
8270
8271 failed:
8272
8273 GROW
8274 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8275 break;
8276 if (!IS_BLANK_CH(RAW)) {
8277 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8278 "attributes construct error\n");
8279 }
8280 SKIP_BLANKS;
8281 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8282 (attname == NULL) && (attvalue == NULL)) {
8283 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8284 "xmlParseStartTag: problem parsing attributes\n");
8285 break;
8286 }
8287 SHRINK;
8288 GROW;
8289 }
8290
8291 /*
8292 * SAX: Start of Element !
8293 */
8294 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8295 (!ctxt->disableSAX)) {
8296 if (nbatts > 0)
8297 ctxt->sax->startElement(ctxt->userData, name, atts);
8298 else
8299 ctxt->sax->startElement(ctxt->userData, name, NULL);
8300 }
8301
8302 if (atts != NULL) {
8303 /* Free only the content strings */
8304 for (i = 1;i < nbatts;i+=2)
8305 if (atts[i] != NULL)
8306 xmlFree((xmlChar *) atts[i]);
8307 }
8308 return(name);
8309 }
8310
8311 /**
8312 * xmlParseEndTag1:
8313 * @ctxt: an XML parser context
8314 * @line: line of the start tag
8315 * @nsNr: number of namespaces on the start tag
8316 *
8317 * parse an end of tag
8318 *
8319 * [42] ETag ::= '</' Name S? '>'
8320 *
8321 * With namespace
8322 *
8323 * [NS 9] ETag ::= '</' QName S? '>'
8324 */
8325
8326 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8327 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8328 const xmlChar *name;
8329
8330 GROW;
8331 if ((RAW != '<') || (NXT(1) != '/')) {
8332 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8333 "xmlParseEndTag: '</' not found\n");
8334 return;
8335 }
8336 SKIP(2);
8337
8338 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8339
8340 /*
8341 * We should definitely be at the ending "S? '>'" part
8342 */
8343 GROW;
8344 SKIP_BLANKS;
8345 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8346 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8347 } else
8348 NEXT1;
8349
8350 /*
8351 * [ WFC: Element Type Match ]
8352 * The Name in an element's end-tag must match the element type in the
8353 * start-tag.
8354 *
8355 */
8356 if (name != (xmlChar*)1) {
8357 if (name == NULL) name = BAD_CAST "unparseable";
8358 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8359 "Opening and ending tag mismatch: %s line %d and %s\n",
8360 ctxt->name, line, name);
8361 }
8362
8363 /*
8364 * SAX: End of Tag
8365 */
8366 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8367 (!ctxt->disableSAX))
8368 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8369
8370 namePop(ctxt);
8371 spacePop(ctxt);
8372 return;
8373 }
8374
8375 /**
8376 * xmlParseEndTag:
8377 * @ctxt: an XML parser context
8378 *
8379 * parse an end of tag
8380 *
8381 * [42] ETag ::= '</' Name S? '>'
8382 *
8383 * With namespace
8384 *
8385 * [NS 9] ETag ::= '</' QName S? '>'
8386 */
8387
8388 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8389 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8390 xmlParseEndTag1(ctxt, 0);
8391 }
8392 #endif /* LIBXML_SAX1_ENABLED */
8393
8394 /************************************************************************
8395 * *
8396 * SAX 2 specific operations *
8397 * *
8398 ************************************************************************/
8399
8400 /*
8401 * xmlGetNamespace:
8402 * @ctxt: an XML parser context
8403 * @prefix: the prefix to lookup
8404 *
8405 * Lookup the namespace name for the @prefix (which ca be NULL)
8406 * The prefix must come from the @ctxt->dict dictionnary
8407 *
8408 * Returns the namespace name or NULL if not bound
8409 */
8410 static const xmlChar *
xmlGetNamespace(xmlParserCtxtPtr ctxt,const xmlChar * prefix)8411 xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8412 int i;
8413
8414 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8415 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8416 if (ctxt->nsTab[i] == prefix) {
8417 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8418 return(NULL);
8419 return(ctxt->nsTab[i + 1]);
8420 }
8421 return(NULL);
8422 }
8423
8424 /**
8425 * xmlParseQName:
8426 * @ctxt: an XML parser context
8427 * @prefix: pointer to store the prefix part
8428 *
8429 * parse an XML Namespace QName
8430 *
8431 * [6] QName ::= (Prefix ':')? LocalPart
8432 * [7] Prefix ::= NCName
8433 * [8] LocalPart ::= NCName
8434 *
8435 * Returns the Name parsed or NULL
8436 */
8437
8438 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8439 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8440 const xmlChar *l, *p;
8441
8442 GROW;
8443
8444 l = xmlParseNCName(ctxt);
8445 if (l == NULL) {
8446 if (CUR == ':') {
8447 l = xmlParseName(ctxt);
8448 if (l != NULL) {
8449 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8450 "Failed to parse QName '%s'\n", l, NULL, NULL);
8451 *prefix = NULL;
8452 return(l);
8453 }
8454 }
8455 return(NULL);
8456 }
8457 if (CUR == ':') {
8458 NEXT;
8459 p = l;
8460 l = xmlParseNCName(ctxt);
8461 if (l == NULL) {
8462 xmlChar *tmp;
8463
8464 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8465 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8466 l = xmlParseNmtoken(ctxt);
8467 if (l == NULL)
8468 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8469 else {
8470 tmp = xmlBuildQName(l, p, NULL, 0);
8471 xmlFree((char *)l);
8472 }
8473 p = xmlDictLookup(ctxt->dict, tmp, -1);
8474 if (tmp != NULL) xmlFree(tmp);
8475 *prefix = NULL;
8476 return(p);
8477 }
8478 if (CUR == ':') {
8479 xmlChar *tmp;
8480
8481 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8482 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8483 NEXT;
8484 tmp = (xmlChar *) xmlParseName(ctxt);
8485 if (tmp != NULL) {
8486 tmp = xmlBuildQName(tmp, l, NULL, 0);
8487 l = xmlDictLookup(ctxt->dict, tmp, -1);
8488 if (tmp != NULL) xmlFree(tmp);
8489 *prefix = p;
8490 return(l);
8491 }
8492 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8493 l = xmlDictLookup(ctxt->dict, tmp, -1);
8494 if (tmp != NULL) xmlFree(tmp);
8495 *prefix = p;
8496 return(l);
8497 }
8498 *prefix = p;
8499 } else
8500 *prefix = NULL;
8501 return(l);
8502 }
8503
8504 /**
8505 * xmlParseQNameAndCompare:
8506 * @ctxt: an XML parser context
8507 * @name: the localname
8508 * @prefix: the prefix, if any.
8509 *
8510 * parse an XML name and compares for match
8511 * (specialized for endtag parsing)
8512 *
8513 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8514 * and the name for mismatch
8515 */
8516
8517 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8518 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8519 xmlChar const *prefix) {
8520 const xmlChar *cmp;
8521 const xmlChar *in;
8522 const xmlChar *ret;
8523 const xmlChar *prefix2;
8524
8525 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8526
8527 GROW;
8528 in = ctxt->input->cur;
8529
8530 cmp = prefix;
8531 while (*in != 0 && *in == *cmp) {
8532 ++in;
8533 ++cmp;
8534 }
8535 if ((*cmp == 0) && (*in == ':')) {
8536 in++;
8537 cmp = name;
8538 while (*in != 0 && *in == *cmp) {
8539 ++in;
8540 ++cmp;
8541 }
8542 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8543 /* success */
8544 ctxt->input->cur = in;
8545 return((const xmlChar*) 1);
8546 }
8547 }
8548 /*
8549 * all strings coms from the dictionary, equality can be done directly
8550 */
8551 ret = xmlParseQName (ctxt, &prefix2);
8552 if ((ret == name) && (prefix == prefix2))
8553 return((const xmlChar*) 1);
8554 return ret;
8555 }
8556
8557 /**
8558 * xmlParseAttValueInternal:
8559 * @ctxt: an XML parser context
8560 * @len: attribute len result
8561 * @alloc: whether the attribute was reallocated as a new string
8562 * @normalize: if 1 then further non-CDATA normalization must be done
8563 *
8564 * parse a value for an attribute.
8565 * NOTE: if no normalization is needed, the routine will return pointers
8566 * directly from the data buffer.
8567 *
8568 * 3.3.3 Attribute-Value Normalization:
8569 * Before the value of an attribute is passed to the application or
8570 * checked for validity, the XML processor must normalize it as follows:
8571 * - a character reference is processed by appending the referenced
8572 * character to the attribute value
8573 * - an entity reference is processed by recursively processing the
8574 * replacement text of the entity
8575 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8576 * appending #x20 to the normalized value, except that only a single
8577 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8578 * parsed entity or the literal entity value of an internal parsed entity
8579 * - other characters are processed by appending them to the normalized value
8580 * If the declared value is not CDATA, then the XML processor must further
8581 * process the normalized attribute value by discarding any leading and
8582 * trailing space (#x20) characters, and by replacing sequences of space
8583 * (#x20) characters by a single space (#x20) character.
8584 * All attributes for which no declaration has been read should be treated
8585 * by a non-validating parser as if declared CDATA.
8586 *
8587 * Returns the AttValue parsed or NULL. The value has to be freed by the
8588 * caller if it was copied, this can be detected by val[*len] == 0.
8589 */
8590
8591 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * len,int * alloc,int normalize)8592 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8593 int normalize)
8594 {
8595 xmlChar limit = 0;
8596 const xmlChar *in = NULL, *start, *end, *last;
8597 xmlChar *ret = NULL;
8598
8599 GROW;
8600 in = (xmlChar *) CUR_PTR;
8601 if (*in != '"' && *in != '\'') {
8602 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8603 return (NULL);
8604 }
8605 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8606
8607 /*
8608 * try to handle in this routine the most common case where no
8609 * allocation of a new string is required and where content is
8610 * pure ASCII.
8611 */
8612 limit = *in++;
8613 end = ctxt->input->end;
8614 start = in;
8615 if (in >= end) {
8616 const xmlChar *oldbase = ctxt->input->base;
8617 GROW;
8618 if (oldbase != ctxt->input->base) {
8619 long delta = ctxt->input->base - oldbase;
8620 start = start + delta;
8621 in = in + delta;
8622 }
8623 end = ctxt->input->end;
8624 }
8625 if (normalize) {
8626 /*
8627 * Skip any leading spaces
8628 */
8629 while ((in < end) && (*in != limit) &&
8630 ((*in == 0x20) || (*in == 0x9) ||
8631 (*in == 0xA) || (*in == 0xD))) {
8632 in++;
8633 start = in;
8634 if (in >= end) {
8635 const xmlChar *oldbase = ctxt->input->base;
8636 GROW;
8637 if (oldbase != ctxt->input->base) {
8638 long delta = ctxt->input->base - oldbase;
8639 start = start + delta;
8640 in = in + delta;
8641 }
8642 end = ctxt->input->end;
8643 }
8644 }
8645 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8646 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8647 if ((*in++ == 0x20) && (*in == 0x20)) break;
8648 if (in >= end) {
8649 const xmlChar *oldbase = ctxt->input->base;
8650 GROW;
8651 if (oldbase != ctxt->input->base) {
8652 long delta = ctxt->input->base - oldbase;
8653 start = start + delta;
8654 in = in + delta;
8655 }
8656 end = ctxt->input->end;
8657 }
8658 }
8659 last = in;
8660 /*
8661 * skip the trailing blanks
8662 */
8663 while ((last[-1] == 0x20) && (last > start)) last--;
8664 while ((in < end) && (*in != limit) &&
8665 ((*in == 0x20) || (*in == 0x9) ||
8666 (*in == 0xA) || (*in == 0xD))) {
8667 in++;
8668 if (in >= end) {
8669 const xmlChar *oldbase = ctxt->input->base;
8670 GROW;
8671 if (oldbase != ctxt->input->base) {
8672 long delta = ctxt->input->base - oldbase;
8673 start = start + delta;
8674 in = in + delta;
8675 last = last + delta;
8676 }
8677 end = ctxt->input->end;
8678 }
8679 }
8680 if (*in != limit) goto need_complex;
8681 } else {
8682 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8683 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8684 in++;
8685 if (in >= end) {
8686 const xmlChar *oldbase = ctxt->input->base;
8687 GROW;
8688 if (oldbase != ctxt->input->base) {
8689 long delta = ctxt->input->base - oldbase;
8690 start = start + delta;
8691 in = in + delta;
8692 }
8693 end = ctxt->input->end;
8694 }
8695 }
8696 last = in;
8697 if (*in != limit) goto need_complex;
8698 }
8699 in++;
8700 if (len != NULL) {
8701 *len = last - start;
8702 ret = (xmlChar *) start;
8703 } else {
8704 if (alloc) *alloc = 1;
8705 ret = xmlStrndup(start, last - start);
8706 }
8707 CUR_PTR = in;
8708 if (alloc) *alloc = 0;
8709 return ret;
8710 need_complex:
8711 if (alloc) *alloc = 1;
8712 return xmlParseAttValueComplex(ctxt, len, normalize);
8713 }
8714
8715 /**
8716 * xmlParseAttribute2:
8717 * @ctxt: an XML parser context
8718 * @pref: the element prefix
8719 * @elem: the element name
8720 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8721 * @value: a xmlChar ** used to store the value of the attribute
8722 * @len: an int * to save the length of the attribute
8723 * @alloc: an int * to indicate if the attribute was allocated
8724 *
8725 * parse an attribute in the new SAX2 framework.
8726 *
8727 * Returns the attribute name, and the value in *value, .
8728 */
8729
8730 static const xmlChar *
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,const xmlChar ** prefix,xmlChar ** value,int * len,int * alloc)8731 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8732 const xmlChar * pref, const xmlChar * elem,
8733 const xmlChar ** prefix, xmlChar ** value,
8734 int *len, int *alloc)
8735 {
8736 const xmlChar *name;
8737 xmlChar *val, *internal_val = NULL;
8738 int normalize = 0;
8739
8740 *value = NULL;
8741 GROW;
8742 name = xmlParseQName(ctxt, prefix);
8743 if (name == NULL) {
8744 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8745 "error parsing attribute name\n");
8746 return (NULL);
8747 }
8748
8749 /*
8750 * get the type if needed
8751 */
8752 if (ctxt->attsSpecial != NULL) {
8753 int type;
8754
8755 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8756 pref, elem, *prefix, name);
8757 if (type != 0)
8758 normalize = 1;
8759 }
8760
8761 /*
8762 * read the value
8763 */
8764 SKIP_BLANKS;
8765 if (RAW == '=') {
8766 NEXT;
8767 SKIP_BLANKS;
8768 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8769 if (normalize) {
8770 /*
8771 * Sometimes a second normalisation pass for spaces is needed
8772 * but that only happens if charrefs or entities refernces
8773 * have been used in the attribute value, i.e. the attribute
8774 * value have been extracted in an allocated string already.
8775 */
8776 if (*alloc) {
8777 const xmlChar *val2;
8778
8779 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8780 if ((val2 != NULL) && (val2 != val)) {
8781 xmlFree(val);
8782 val = (xmlChar *) val2;
8783 }
8784 }
8785 }
8786 ctxt->instate = XML_PARSER_CONTENT;
8787 } else {
8788 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8789 "Specification mandate value for attribute %s\n",
8790 name);
8791 return (NULL);
8792 }
8793
8794 if (*prefix == ctxt->str_xml) {
8795 /*
8796 * Check that xml:lang conforms to the specification
8797 * No more registered as an error, just generate a warning now
8798 * since this was deprecated in XML second edition
8799 */
8800 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8801 internal_val = xmlStrndup(val, *len);
8802 if (!xmlCheckLanguageID(internal_val)) {
8803 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8804 "Malformed value for xml:lang : %s\n",
8805 internal_val, NULL);
8806 }
8807 }
8808
8809 /*
8810 * Check that xml:space conforms to the specification
8811 */
8812 if (xmlStrEqual(name, BAD_CAST "space")) {
8813 internal_val = xmlStrndup(val, *len);
8814 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8815 *(ctxt->space) = 0;
8816 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8817 *(ctxt->space) = 1;
8818 else {
8819 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8820 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8821 internal_val, NULL);
8822 }
8823 }
8824 if (internal_val) {
8825 xmlFree(internal_val);
8826 }
8827 }
8828
8829 *value = val;
8830 return (name);
8831 }
8832 /**
8833 * xmlParseStartTag2:
8834 * @ctxt: an XML parser context
8835 *
8836 * parse a start of tag either for rule element or
8837 * EmptyElement. In both case we don't parse the tag closing chars.
8838 * This routine is called when running SAX2 parsing
8839 *
8840 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8841 *
8842 * [ WFC: Unique Att Spec ]
8843 * No attribute name may appear more than once in the same start-tag or
8844 * empty-element tag.
8845 *
8846 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8847 *
8848 * [ WFC: Unique Att Spec ]
8849 * No attribute name may appear more than once in the same start-tag or
8850 * empty-element tag.
8851 *
8852 * With namespace:
8853 *
8854 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8855 *
8856 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8857 *
8858 * Returns the element name parsed
8859 */
8860
8861 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * tlen)8862 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8863 const xmlChar **URI, int *tlen) {
8864 const xmlChar *localname;
8865 const xmlChar *prefix;
8866 const xmlChar *attname;
8867 const xmlChar *aprefix;
8868 const xmlChar *nsname;
8869 xmlChar *attvalue;
8870 const xmlChar **atts = ctxt->atts;
8871 int maxatts = ctxt->maxatts;
8872 int nratts, nbatts, nbdef;
8873 int i, j, nbNs, attval, oldline, oldcol;
8874 const xmlChar *base;
8875 unsigned long cur;
8876 int nsNr = ctxt->nsNr;
8877
8878 if (RAW != '<') return(NULL);
8879 NEXT1;
8880
8881 /*
8882 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8883 * point since the attribute values may be stored as pointers to
8884 * the buffer and calling SHRINK would destroy them !
8885 * The Shrinking is only possible once the full set of attribute
8886 * callbacks have been done.
8887 */
8888 reparse:
8889 SHRINK;
8890 base = ctxt->input->base;
8891 cur = ctxt->input->cur - ctxt->input->base;
8892 oldline = ctxt->input->line;
8893 oldcol = ctxt->input->col;
8894 nbatts = 0;
8895 nratts = 0;
8896 nbdef = 0;
8897 nbNs = 0;
8898 attval = 0;
8899 /* Forget any namespaces added during an earlier parse of this element. */
8900 ctxt->nsNr = nsNr;
8901
8902 localname = xmlParseQName(ctxt, &prefix);
8903 if (localname == NULL) {
8904 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8905 "StartTag: invalid element name\n");
8906 return(NULL);
8907 }
8908 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8909
8910 /*
8911 * Now parse the attributes, it ends up with the ending
8912 *
8913 * (S Attribute)* S?
8914 */
8915 SKIP_BLANKS;
8916 GROW;
8917 if (ctxt->input->base != base) goto base_changed;
8918
8919 while ((RAW != '>') &&
8920 ((RAW != '/') || (NXT(1) != '>')) &&
8921 (IS_BYTE_CHAR(RAW))) {
8922 const xmlChar *q = CUR_PTR;
8923 unsigned int cons = ctxt->input->consumed;
8924 int len = -1, alloc = 0;
8925
8926 attname = xmlParseAttribute2(ctxt, prefix, localname,
8927 &aprefix, &attvalue, &len, &alloc);
8928 if (ctxt->input->base != base) {
8929 if ((attvalue != NULL) && (alloc != 0))
8930 xmlFree(attvalue);
8931 attvalue = NULL;
8932 goto base_changed;
8933 }
8934 if ((attname != NULL) && (attvalue != NULL)) {
8935 if (len < 0) len = xmlStrlen(attvalue);
8936 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8937 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8938 xmlURIPtr uri;
8939
8940 if (*URL != 0) {
8941 uri = xmlParseURI((const char *) URL);
8942 if (uri == NULL) {
8943 xmlNsErr(ctxt, XML_WAR_NS_URI,
8944 "xmlns: '%s' is not a valid URI\n",
8945 URL, NULL, NULL);
8946 } else {
8947 if (uri->scheme == NULL) {
8948 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8949 "xmlns: URI %s is not absolute\n",
8950 URL, NULL, NULL);
8951 }
8952 xmlFreeURI(uri);
8953 }
8954 if (URL == ctxt->str_xml_ns) {
8955 if (attname != ctxt->str_xml) {
8956 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8957 "xml namespace URI cannot be the default namespace\n",
8958 NULL, NULL, NULL);
8959 }
8960 goto skip_default_ns;
8961 }
8962 if ((len == 29) &&
8963 (xmlStrEqual(URL,
8964 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8965 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8966 "reuse of the xmlns namespace name is forbidden\n",
8967 NULL, NULL, NULL);
8968 goto skip_default_ns;
8969 }
8970 }
8971 /*
8972 * check that it's not a defined namespace
8973 */
8974 for (j = 1;j <= nbNs;j++)
8975 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8976 break;
8977 if (j <= nbNs)
8978 xmlErrAttributeDup(ctxt, NULL, attname);
8979 else
8980 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8981 skip_default_ns:
8982 if (alloc != 0) xmlFree(attvalue);
8983 SKIP_BLANKS;
8984 continue;
8985 }
8986 if (aprefix == ctxt->str_xmlns) {
8987 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8988 xmlURIPtr uri;
8989
8990 if (attname == ctxt->str_xml) {
8991 if (URL != ctxt->str_xml_ns) {
8992 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8993 "xml namespace prefix mapped to wrong URI\n",
8994 NULL, NULL, NULL);
8995 }
8996 /*
8997 * Do not keep a namespace definition node
8998 */
8999 goto skip_ns;
9000 }
9001 if (URL == ctxt->str_xml_ns) {
9002 if (attname != ctxt->str_xml) {
9003 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9004 "xml namespace URI mapped to wrong prefix\n",
9005 NULL, NULL, NULL);
9006 }
9007 goto skip_ns;
9008 }
9009 if (attname == ctxt->str_xmlns) {
9010 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9011 "redefinition of the xmlns prefix is forbidden\n",
9012 NULL, NULL, NULL);
9013 goto skip_ns;
9014 }
9015 if ((len == 29) &&
9016 (xmlStrEqual(URL,
9017 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9018 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9019 "reuse of the xmlns namespace name is forbidden\n",
9020 NULL, NULL, NULL);
9021 goto skip_ns;
9022 }
9023 if ((URL == NULL) || (URL[0] == 0)) {
9024 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9025 "xmlns:%s: Empty XML namespace is not allowed\n",
9026 attname, NULL, NULL);
9027 goto skip_ns;
9028 } else {
9029 uri = xmlParseURI((const char *) URL);
9030 if (uri == NULL) {
9031 xmlNsErr(ctxt, XML_WAR_NS_URI,
9032 "xmlns:%s: '%s' is not a valid URI\n",
9033 attname, URL, NULL);
9034 } else {
9035 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9036 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9037 "xmlns:%s: URI %s is not absolute\n",
9038 attname, URL, NULL);
9039 }
9040 xmlFreeURI(uri);
9041 }
9042 }
9043
9044 /*
9045 * check that it's not a defined namespace
9046 */
9047 for (j = 1;j <= nbNs;j++)
9048 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9049 break;
9050 if (j <= nbNs)
9051 xmlErrAttributeDup(ctxt, aprefix, attname);
9052 else
9053 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9054 skip_ns:
9055 if (alloc != 0) xmlFree(attvalue);
9056 SKIP_BLANKS;
9057 if (ctxt->input->base != base) goto base_changed;
9058 continue;
9059 }
9060
9061 /*
9062 * Add the pair to atts
9063 */
9064 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9065 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9066 if (attvalue[len] == 0)
9067 xmlFree(attvalue);
9068 goto failed;
9069 }
9070 maxatts = ctxt->maxatts;
9071 atts = ctxt->atts;
9072 }
9073 ctxt->attallocs[nratts++] = alloc;
9074 atts[nbatts++] = attname;
9075 atts[nbatts++] = aprefix;
9076 atts[nbatts++] = NULL; /* the URI will be fetched later */
9077 atts[nbatts++] = attvalue;
9078 attvalue += len;
9079 atts[nbatts++] = attvalue;
9080 /*
9081 * tag if some deallocation is needed
9082 */
9083 if (alloc != 0) attval = 1;
9084 } else {
9085 if ((attvalue != NULL) && (attvalue[len] == 0))
9086 xmlFree(attvalue);
9087 }
9088
9089 failed:
9090
9091 GROW
9092 if (ctxt->input->base != base) goto base_changed;
9093 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9094 break;
9095 if (!IS_BLANK_CH(RAW)) {
9096 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9097 "attributes construct error\n");
9098 break;
9099 }
9100 SKIP_BLANKS;
9101 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9102 (attname == NULL) && (attvalue == NULL)) {
9103 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9104 "xmlParseStartTag: problem parsing attributes\n");
9105 break;
9106 }
9107 GROW;
9108 if (ctxt->input->base != base) goto base_changed;
9109 }
9110
9111 /*
9112 * The attributes defaulting
9113 */
9114 if (ctxt->attsDefault != NULL) {
9115 xmlDefAttrsPtr defaults;
9116
9117 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9118 if (defaults != NULL) {
9119 for (i = 0;i < defaults->nbAttrs;i++) {
9120 attname = defaults->values[5 * i];
9121 aprefix = defaults->values[5 * i + 1];
9122
9123 /*
9124 * special work for namespaces defaulted defs
9125 */
9126 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9127 /*
9128 * check that it's not a defined namespace
9129 */
9130 for (j = 1;j <= nbNs;j++)
9131 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9132 break;
9133 if (j <= nbNs) continue;
9134
9135 nsname = xmlGetNamespace(ctxt, NULL);
9136 if (nsname != defaults->values[5 * i + 2]) {
9137 if (nsPush(ctxt, NULL,
9138 defaults->values[5 * i + 2]) > 0)
9139 nbNs++;
9140 }
9141 } else if (aprefix == ctxt->str_xmlns) {
9142 /*
9143 * check that it's not a defined namespace
9144 */
9145 for (j = 1;j <= nbNs;j++)
9146 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9147 break;
9148 if (j <= nbNs) continue;
9149
9150 nsname = xmlGetNamespace(ctxt, attname);
9151 if (nsname != defaults->values[2]) {
9152 if (nsPush(ctxt, attname,
9153 defaults->values[5 * i + 2]) > 0)
9154 nbNs++;
9155 }
9156 } else {
9157 /*
9158 * check that it's not a defined attribute
9159 */
9160 for (j = 0;j < nbatts;j+=5) {
9161 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9162 break;
9163 }
9164 if (j < nbatts) continue;
9165
9166 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9167 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9168 return(NULL);
9169 }
9170 maxatts = ctxt->maxatts;
9171 atts = ctxt->atts;
9172 }
9173 atts[nbatts++] = attname;
9174 atts[nbatts++] = aprefix;
9175 if (aprefix == NULL)
9176 atts[nbatts++] = NULL;
9177 else
9178 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9179 atts[nbatts++] = defaults->values[5 * i + 2];
9180 atts[nbatts++] = defaults->values[5 * i + 3];
9181 if ((ctxt->standalone == 1) &&
9182 (defaults->values[5 * i + 4] != NULL)) {
9183 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9184 "standalone: attribute %s on %s defaulted from external subset\n",
9185 attname, localname);
9186 }
9187 nbdef++;
9188 }
9189 }
9190 }
9191 }
9192
9193 /*
9194 * The attributes checkings
9195 */
9196 for (i = 0; i < nbatts;i += 5) {
9197 /*
9198 * The default namespace does not apply to attribute names.
9199 */
9200 if (atts[i + 1] != NULL) {
9201 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9202 if (nsname == NULL) {
9203 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9204 "Namespace prefix %s for %s on %s is not defined\n",
9205 atts[i + 1], atts[i], localname);
9206 }
9207 atts[i + 2] = nsname;
9208 } else
9209 nsname = NULL;
9210 /*
9211 * [ WFC: Unique Att Spec ]
9212 * No attribute name may appear more than once in the same
9213 * start-tag or empty-element tag.
9214 * As extended by the Namespace in XML REC.
9215 */
9216 for (j = 0; j < i;j += 5) {
9217 if (atts[i] == atts[j]) {
9218 if (atts[i+1] == atts[j+1]) {
9219 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9220 break;
9221 }
9222 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9223 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9224 "Namespaced Attribute %s in '%s' redefined\n",
9225 atts[i], nsname, NULL);
9226 break;
9227 }
9228 }
9229 }
9230 }
9231
9232 nsname = xmlGetNamespace(ctxt, prefix);
9233 if ((prefix != NULL) && (nsname == NULL)) {
9234 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9235 "Namespace prefix %s on %s is not defined\n",
9236 prefix, localname, NULL);
9237 }
9238 *pref = prefix;
9239 *URI = nsname;
9240
9241 /*
9242 * SAX: Start of Element !
9243 */
9244 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9245 (!ctxt->disableSAX)) {
9246 if (nbNs > 0)
9247 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9248 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9249 nbatts / 5, nbdef, atts);
9250 else
9251 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9252 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9253 }
9254
9255 /*
9256 * Free up attribute allocated strings if needed
9257 */
9258 if (attval != 0) {
9259 for (i = 3,j = 0; j < nratts;i += 5,j++)
9260 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9261 xmlFree((xmlChar *) atts[i]);
9262 }
9263
9264 return(localname);
9265
9266 base_changed:
9267 /*
9268 * the attribute strings are valid iif the base didn't changed
9269 */
9270 if (attval != 0) {
9271 for (i = 3,j = 0; j < nratts;i += 5,j++)
9272 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9273 xmlFree((xmlChar *) atts[i]);
9274 }
9275 ctxt->input->cur = ctxt->input->base + cur;
9276 ctxt->input->line = oldline;
9277 ctxt->input->col = oldcol;
9278 if (ctxt->wellFormed == 1) {
9279 goto reparse;
9280 }
9281 return(NULL);
9282 }
9283
9284 /**
9285 * xmlParseEndTag2:
9286 * @ctxt: an XML parser context
9287 * @line: line of the start tag
9288 * @nsNr: number of namespaces on the start tag
9289 *
9290 * parse an end of tag
9291 *
9292 * [42] ETag ::= '</' Name S? '>'
9293 *
9294 * With namespace
9295 *
9296 * [NS 9] ETag ::= '</' QName S? '>'
9297 */
9298
9299 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr,int tlen)9300 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9301 const xmlChar *URI, int line, int nsNr, int tlen) {
9302 const xmlChar *name;
9303
9304 GROW;
9305 if ((RAW != '<') || (NXT(1) != '/')) {
9306 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9307 return;
9308 }
9309 SKIP(2);
9310
9311 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9312 if (ctxt->input->cur[tlen] == '>') {
9313 ctxt->input->cur += tlen + 1;
9314 goto done;
9315 }
9316 ctxt->input->cur += tlen;
9317 name = (xmlChar*)1;
9318 } else {
9319 if (prefix == NULL)
9320 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9321 else
9322 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9323 }
9324
9325 /*
9326 * We should definitely be at the ending "S? '>'" part
9327 */
9328 GROW;
9329 SKIP_BLANKS;
9330 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9331 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9332 } else
9333 NEXT1;
9334
9335 /*
9336 * [ WFC: Element Type Match ]
9337 * The Name in an element's end-tag must match the element type in the
9338 * start-tag.
9339 *
9340 */
9341 if (name != (xmlChar*)1) {
9342 if (name == NULL) name = BAD_CAST "unparseable";
9343 if ((line == 0) && (ctxt->node != NULL))
9344 line = ctxt->node->line;
9345 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9346 "Opening and ending tag mismatch: %s line %d and %s\n",
9347 ctxt->name, line, name);
9348 }
9349
9350 /*
9351 * SAX: End of Tag
9352 */
9353 done:
9354 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9355 (!ctxt->disableSAX))
9356 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9357
9358 spacePop(ctxt);
9359 if (nsNr != 0)
9360 nsPop(ctxt, nsNr);
9361 return;
9362 }
9363
9364 /**
9365 * xmlParseCDSect:
9366 * @ctxt: an XML parser context
9367 *
9368 * Parse escaped pure raw content.
9369 *
9370 * [18] CDSect ::= CDStart CData CDEnd
9371 *
9372 * [19] CDStart ::= '<![CDATA['
9373 *
9374 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9375 *
9376 * [21] CDEnd ::= ']]>'
9377 */
9378 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9379 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9380 xmlChar *buf = NULL;
9381 int len = 0;
9382 int size = XML_PARSER_BUFFER_SIZE;
9383 int r, rl;
9384 int s, sl;
9385 int cur, l;
9386 int count = 0;
9387
9388 /* Check 2.6.0 was NXT(0) not RAW */
9389 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9390 SKIP(9);
9391 } else
9392 return;
9393
9394 ctxt->instate = XML_PARSER_CDATA_SECTION;
9395 r = CUR_CHAR(rl);
9396 if (!IS_CHAR(r)) {
9397 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9398 ctxt->instate = XML_PARSER_CONTENT;
9399 return;
9400 }
9401 NEXTL(rl);
9402 s = CUR_CHAR(sl);
9403 if (!IS_CHAR(s)) {
9404 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9405 ctxt->instate = XML_PARSER_CONTENT;
9406 return;
9407 }
9408 NEXTL(sl);
9409 cur = CUR_CHAR(l);
9410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9411 if (buf == NULL) {
9412 xmlErrMemory(ctxt, NULL);
9413 return;
9414 }
9415 while (IS_CHAR(cur) &&
9416 ((r != ']') || (s != ']') || (cur != '>'))) {
9417 if (len + 5 >= size) {
9418 xmlChar *tmp;
9419
9420 size *= 2;
9421 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9422 if (tmp == NULL) {
9423 xmlFree(buf);
9424 xmlErrMemory(ctxt, NULL);
9425 return;
9426 }
9427 buf = tmp;
9428 }
9429 COPY_BUF(rl,buf,len,r);
9430 r = s;
9431 rl = sl;
9432 s = cur;
9433 sl = l;
9434 count++;
9435 if (count > 50) {
9436 GROW;
9437 count = 0;
9438 }
9439 NEXTL(l);
9440 cur = CUR_CHAR(l);
9441 }
9442 buf[len] = 0;
9443 ctxt->instate = XML_PARSER_CONTENT;
9444 if (cur != '>') {
9445 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9446 "CData section not finished\n%.50s\n", buf);
9447 xmlFree(buf);
9448 return;
9449 }
9450 NEXTL(l);
9451
9452 /*
9453 * OK the buffer is to be consumed as cdata.
9454 */
9455 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9456 if (ctxt->sax->cdataBlock != NULL)
9457 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9458 else if (ctxt->sax->characters != NULL)
9459 ctxt->sax->characters(ctxt->userData, buf, len);
9460 }
9461 xmlFree(buf);
9462 }
9463
9464 /**
9465 * xmlParseContent:
9466 * @ctxt: an XML parser context
9467 *
9468 * Parse a content:
9469 *
9470 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9471 */
9472
9473 void
xmlParseContent(xmlParserCtxtPtr ctxt)9474 xmlParseContent(xmlParserCtxtPtr ctxt) {
9475 GROW;
9476 while ((RAW != 0) &&
9477 ((RAW != '<') || (NXT(1) != '/')) &&
9478 (ctxt->instate != XML_PARSER_EOF)) {
9479 const xmlChar *test = CUR_PTR;
9480 unsigned int cons = ctxt->input->consumed;
9481 const xmlChar *cur = ctxt->input->cur;
9482
9483 /*
9484 * First case : a Processing Instruction.
9485 */
9486 if ((*cur == '<') && (cur[1] == '?')) {
9487 xmlParsePI(ctxt);
9488 }
9489
9490 /*
9491 * Second case : a CDSection
9492 */
9493 /* 2.6.0 test was *cur not RAW */
9494 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9495 xmlParseCDSect(ctxt);
9496 }
9497
9498 /*
9499 * Third case : a comment
9500 */
9501 else if ((*cur == '<') && (NXT(1) == '!') &&
9502 (NXT(2) == '-') && (NXT(3) == '-')) {
9503 xmlParseComment(ctxt);
9504 ctxt->instate = XML_PARSER_CONTENT;
9505 }
9506
9507 /*
9508 * Fourth case : a sub-element.
9509 */
9510 else if (*cur == '<') {
9511 xmlParseElement(ctxt);
9512 }
9513
9514 /*
9515 * Fifth case : a reference. If if has not been resolved,
9516 * parsing returns it's Name, create the node
9517 */
9518
9519 else if (*cur == '&') {
9520 xmlParseReference(ctxt);
9521 }
9522
9523 /*
9524 * Last case, text. Note that References are handled directly.
9525 */
9526 else {
9527 xmlParseCharData(ctxt, 0);
9528 }
9529
9530 GROW;
9531 /*
9532 * Pop-up of finished entities.
9533 */
9534 while ((RAW == 0) && (ctxt->inputNr > 1))
9535 xmlPopInput(ctxt);
9536 SHRINK;
9537
9538 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
9539 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9540 "detected an error in element content\n");
9541 ctxt->instate = XML_PARSER_EOF;
9542 break;
9543 }
9544 }
9545 }
9546
9547 /**
9548 * xmlParseElement:
9549 * @ctxt: an XML parser context
9550 *
9551 * parse an XML element, this is highly recursive
9552 *
9553 * [39] element ::= EmptyElemTag | STag content ETag
9554 *
9555 * [ WFC: Element Type Match ]
9556 * The Name in an element's end-tag must match the element type in the
9557 * start-tag.
9558 *
9559 */
9560
9561 void
xmlParseElement(xmlParserCtxtPtr ctxt)9562 xmlParseElement(xmlParserCtxtPtr ctxt) {
9563 const xmlChar *name;
9564 const xmlChar *prefix = NULL;
9565 const xmlChar *URI = NULL;
9566 xmlParserNodeInfo node_info;
9567 int line, tlen;
9568 xmlNodePtr ret;
9569 int nsNr = ctxt->nsNr;
9570
9571 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9572 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9573 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9574 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9575 xmlParserMaxDepth);
9576 ctxt->instate = XML_PARSER_EOF;
9577 return;
9578 }
9579
9580 /* Capture start position */
9581 if (ctxt->record_info) {
9582 node_info.begin_pos = ctxt->input->consumed +
9583 (CUR_PTR - ctxt->input->base);
9584 node_info.begin_line = ctxt->input->line;
9585 }
9586
9587 if (ctxt->spaceNr == 0)
9588 spacePush(ctxt, -1);
9589 else if (*ctxt->space == -2)
9590 spacePush(ctxt, -1);
9591 else
9592 spacePush(ctxt, *ctxt->space);
9593
9594 line = ctxt->input->line;
9595 #ifdef LIBXML_SAX1_ENABLED
9596 if (ctxt->sax2)
9597 #endif /* LIBXML_SAX1_ENABLED */
9598 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9599 #ifdef LIBXML_SAX1_ENABLED
9600 else
9601 name = xmlParseStartTag(ctxt);
9602 #endif /* LIBXML_SAX1_ENABLED */
9603 if (ctxt->instate == XML_PARSER_EOF)
9604 return;
9605 if (name == NULL) {
9606 spacePop(ctxt);
9607 return;
9608 }
9609 namePush(ctxt, name);
9610 ret = ctxt->node;
9611
9612 #ifdef LIBXML_VALID_ENABLED
9613 /*
9614 * [ VC: Root Element Type ]
9615 * The Name in the document type declaration must match the element
9616 * type of the root element.
9617 */
9618 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9619 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9620 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9621 #endif /* LIBXML_VALID_ENABLED */
9622
9623 /*
9624 * Check for an Empty Element.
9625 */
9626 if ((RAW == '/') && (NXT(1) == '>')) {
9627 SKIP(2);
9628 if (ctxt->sax2) {
9629 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9630 (!ctxt->disableSAX))
9631 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9632 #ifdef LIBXML_SAX1_ENABLED
9633 } else {
9634 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9635 (!ctxt->disableSAX))
9636 ctxt->sax->endElement(ctxt->userData, name);
9637 #endif /* LIBXML_SAX1_ENABLED */
9638 }
9639 namePop(ctxt);
9640 spacePop(ctxt);
9641 if (nsNr != ctxt->nsNr)
9642 nsPop(ctxt, ctxt->nsNr - nsNr);
9643 if ( ret != NULL && ctxt->record_info ) {
9644 node_info.end_pos = ctxt->input->consumed +
9645 (CUR_PTR - ctxt->input->base);
9646 node_info.end_line = ctxt->input->line;
9647 node_info.node = ret;
9648 xmlParserAddNodeInfo(ctxt, &node_info);
9649 }
9650 return;
9651 }
9652 if (RAW == '>') {
9653 NEXT1;
9654 } else {
9655 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9656 "Couldn't find end of Start Tag %s line %d\n",
9657 name, line, NULL);
9658
9659 /*
9660 * end of parsing of this node.
9661 */
9662 nodePop(ctxt);
9663 namePop(ctxt);
9664 spacePop(ctxt);
9665 if (nsNr != ctxt->nsNr)
9666 nsPop(ctxt, ctxt->nsNr - nsNr);
9667
9668 /*
9669 * Capture end position and add node
9670 */
9671 if ( ret != NULL && ctxt->record_info ) {
9672 node_info.end_pos = ctxt->input->consumed +
9673 (CUR_PTR - ctxt->input->base);
9674 node_info.end_line = ctxt->input->line;
9675 node_info.node = ret;
9676 xmlParserAddNodeInfo(ctxt, &node_info);
9677 }
9678 return;
9679 }
9680
9681 /*
9682 * Parse the content of the element:
9683 */
9684 xmlParseContent(ctxt);
9685 if (!IS_BYTE_CHAR(RAW)) {
9686 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9687 "Premature end of data in tag %s line %d\n",
9688 name, line, NULL);
9689
9690 /*
9691 * end of parsing of this node.
9692 */
9693 nodePop(ctxt);
9694 namePop(ctxt);
9695 spacePop(ctxt);
9696 if (nsNr != ctxt->nsNr)
9697 nsPop(ctxt, ctxt->nsNr - nsNr);
9698 return;
9699 }
9700
9701 /*
9702 * parse the end of tag: '</' should be here.
9703 */
9704 if (ctxt->sax2) {
9705 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
9706 namePop(ctxt);
9707 }
9708 #ifdef LIBXML_SAX1_ENABLED
9709 else
9710 xmlParseEndTag1(ctxt, line);
9711 #endif /* LIBXML_SAX1_ENABLED */
9712
9713 /*
9714 * Capture end position and add node
9715 */
9716 if ( ret != NULL && ctxt->record_info ) {
9717 node_info.end_pos = ctxt->input->consumed +
9718 (CUR_PTR - ctxt->input->base);
9719 node_info.end_line = ctxt->input->line;
9720 node_info.node = ret;
9721 xmlParserAddNodeInfo(ctxt, &node_info);
9722 }
9723 }
9724
9725 /**
9726 * xmlParseVersionNum:
9727 * @ctxt: an XML parser context
9728 *
9729 * parse the XML version value.
9730 *
9731 * [26] VersionNum ::= '1.' [0-9]+
9732 *
9733 * In practice allow [0-9].[0-9]+ at that level
9734 *
9735 * Returns the string giving the XML version number, or NULL
9736 */
9737 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)9738 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9739 xmlChar *buf = NULL;
9740 int len = 0;
9741 int size = 10;
9742 xmlChar cur;
9743
9744 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9745 if (buf == NULL) {
9746 xmlErrMemory(ctxt, NULL);
9747 return(NULL);
9748 }
9749 cur = CUR;
9750 if (!((cur >= '0') && (cur <= '9'))) {
9751 xmlFree(buf);
9752 return(NULL);
9753 }
9754 buf[len++] = cur;
9755 NEXT;
9756 cur=CUR;
9757 if (cur != '.') {
9758 xmlFree(buf);
9759 return(NULL);
9760 }
9761 buf[len++] = cur;
9762 NEXT;
9763 cur=CUR;
9764 while ((cur >= '0') && (cur <= '9')) {
9765 if (len + 1 >= size) {
9766 xmlChar *tmp;
9767
9768 size *= 2;
9769 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9770 if (tmp == NULL) {
9771 xmlFree(buf);
9772 xmlErrMemory(ctxt, NULL);
9773 return(NULL);
9774 }
9775 buf = tmp;
9776 }
9777 buf[len++] = cur;
9778 NEXT;
9779 cur=CUR;
9780 }
9781 buf[len] = 0;
9782 return(buf);
9783 }
9784
9785 /**
9786 * xmlParseVersionInfo:
9787 * @ctxt: an XML parser context
9788 *
9789 * parse the XML version.
9790 *
9791 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9792 *
9793 * [25] Eq ::= S? '=' S?
9794 *
9795 * Returns the version string, e.g. "1.0"
9796 */
9797
9798 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)9799 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9800 xmlChar *version = NULL;
9801
9802 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9803 SKIP(7);
9804 SKIP_BLANKS;
9805 if (RAW != '=') {
9806 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9807 return(NULL);
9808 }
9809 NEXT;
9810 SKIP_BLANKS;
9811 if (RAW == '"') {
9812 NEXT;
9813 version = xmlParseVersionNum(ctxt);
9814 if (RAW != '"') {
9815 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9816 } else
9817 NEXT;
9818 } else if (RAW == '\''){
9819 NEXT;
9820 version = xmlParseVersionNum(ctxt);
9821 if (RAW != '\'') {
9822 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9823 } else
9824 NEXT;
9825 } else {
9826 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9827 }
9828 }
9829 return(version);
9830 }
9831
9832 /**
9833 * xmlParseEncName:
9834 * @ctxt: an XML parser context
9835 *
9836 * parse the XML encoding name
9837 *
9838 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9839 *
9840 * Returns the encoding name value or NULL
9841 */
9842 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)9843 xmlParseEncName(xmlParserCtxtPtr ctxt) {
9844 xmlChar *buf = NULL;
9845 int len = 0;
9846 int size = 10;
9847 xmlChar cur;
9848
9849 cur = CUR;
9850 if (((cur >= 'a') && (cur <= 'z')) ||
9851 ((cur >= 'A') && (cur <= 'Z'))) {
9852 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9853 if (buf == NULL) {
9854 xmlErrMemory(ctxt, NULL);
9855 return(NULL);
9856 }
9857
9858 buf[len++] = cur;
9859 NEXT;
9860 cur = CUR;
9861 while (((cur >= 'a') && (cur <= 'z')) ||
9862 ((cur >= 'A') && (cur <= 'Z')) ||
9863 ((cur >= '0') && (cur <= '9')) ||
9864 (cur == '.') || (cur == '_') ||
9865 (cur == '-')) {
9866 if (len + 1 >= size) {
9867 xmlChar *tmp;
9868
9869 size *= 2;
9870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9871 if (tmp == NULL) {
9872 xmlErrMemory(ctxt, NULL);
9873 xmlFree(buf);
9874 return(NULL);
9875 }
9876 buf = tmp;
9877 }
9878 buf[len++] = cur;
9879 NEXT;
9880 cur = CUR;
9881 if (cur == 0) {
9882 SHRINK;
9883 GROW;
9884 cur = CUR;
9885 }
9886 }
9887 buf[len] = 0;
9888 } else {
9889 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9890 }
9891 return(buf);
9892 }
9893
9894 /**
9895 * xmlParseEncodingDecl:
9896 * @ctxt: an XML parser context
9897 *
9898 * parse the XML encoding declaration
9899 *
9900 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9901 *
9902 * this setups the conversion filters.
9903 *
9904 * Returns the encoding value or NULL
9905 */
9906
9907 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)9908 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9909 xmlChar *encoding = NULL;
9910
9911 SKIP_BLANKS;
9912 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9913 SKIP(8);
9914 SKIP_BLANKS;
9915 if (RAW != '=') {
9916 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9917 return(NULL);
9918 }
9919 NEXT;
9920 SKIP_BLANKS;
9921 if (RAW == '"') {
9922 NEXT;
9923 encoding = xmlParseEncName(ctxt);
9924 if (RAW != '"') {
9925 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9926 } else
9927 NEXT;
9928 } else if (RAW == '\''){
9929 NEXT;
9930 encoding = xmlParseEncName(ctxt);
9931 if (RAW != '\'') {
9932 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9933 } else
9934 NEXT;
9935 } else {
9936 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9937 }
9938
9939 /*
9940 * Non standard parsing, allowing the user to ignore encoding
9941 */
9942 if (ctxt->options & XML_PARSE_IGNORE_ENC)
9943 return(encoding);
9944
9945 /*
9946 * UTF-16 encoding stwich has already taken place at this stage,
9947 * more over the little-endian/big-endian selection is already done
9948 */
9949 if ((encoding != NULL) &&
9950 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9951 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9952 /*
9953 * If no encoding was passed to the parser, that we are
9954 * using UTF-16 and no decoder is present i.e. the
9955 * document is apparently UTF-8 compatible, then raise an
9956 * encoding mismatch fatal error
9957 */
9958 if ((ctxt->encoding == NULL) &&
9959 (ctxt->input->buf != NULL) &&
9960 (ctxt->input->buf->encoder == NULL)) {
9961 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9962 "Document labelled UTF-16 but has UTF-8 content\n");
9963 }
9964 if (ctxt->encoding != NULL)
9965 xmlFree((xmlChar *) ctxt->encoding);
9966 ctxt->encoding = encoding;
9967 }
9968 /*
9969 * UTF-8 encoding is handled natively
9970 */
9971 else if ((encoding != NULL) &&
9972 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9973 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9974 if (ctxt->encoding != NULL)
9975 xmlFree((xmlChar *) ctxt->encoding);
9976 ctxt->encoding = encoding;
9977 }
9978 else if (encoding != NULL) {
9979 xmlCharEncodingHandlerPtr handler;
9980
9981 if (ctxt->input->encoding != NULL)
9982 xmlFree((xmlChar *) ctxt->input->encoding);
9983 ctxt->input->encoding = encoding;
9984
9985 handler = xmlFindCharEncodingHandler((const char *) encoding);
9986 if (handler != NULL) {
9987 xmlSwitchToEncoding(ctxt, handler);
9988 } else {
9989 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9990 "Unsupported encoding %s\n", encoding);
9991 return(NULL);
9992 }
9993 }
9994 }
9995 return(encoding);
9996 }
9997
9998 /**
9999 * xmlParseSDDecl:
10000 * @ctxt: an XML parser context
10001 *
10002 * parse the XML standalone declaration
10003 *
10004 * [32] SDDecl ::= S 'standalone' Eq
10005 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10006 *
10007 * [ VC: Standalone Document Declaration ]
10008 * TODO The standalone document declaration must have the value "no"
10009 * if any external markup declarations contain declarations of:
10010 * - attributes with default values, if elements to which these
10011 * attributes apply appear in the document without specifications
10012 * of values for these attributes, or
10013 * - entities (other than amp, lt, gt, apos, quot), if references
10014 * to those entities appear in the document, or
10015 * - attributes with values subject to normalization, where the
10016 * attribute appears in the document with a value which will change
10017 * as a result of normalization, or
10018 * - element types with element content, if white space occurs directly
10019 * within any instance of those types.
10020 *
10021 * Returns:
10022 * 1 if standalone="yes"
10023 * 0 if standalone="no"
10024 * -2 if standalone attribute is missing or invalid
10025 * (A standalone value of -2 means that the XML declaration was found,
10026 * but no value was specified for the standalone attribute).
10027 */
10028
10029 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10030 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10031 int standalone = -2;
10032
10033 SKIP_BLANKS;
10034 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10035 SKIP(10);
10036 SKIP_BLANKS;
10037 if (RAW != '=') {
10038 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10039 return(standalone);
10040 }
10041 NEXT;
10042 SKIP_BLANKS;
10043 if (RAW == '\''){
10044 NEXT;
10045 if ((RAW == 'n') && (NXT(1) == 'o')) {
10046 standalone = 0;
10047 SKIP(2);
10048 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10049 (NXT(2) == 's')) {
10050 standalone = 1;
10051 SKIP(3);
10052 } else {
10053 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10054 }
10055 if (RAW != '\'') {
10056 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10057 } else
10058 NEXT;
10059 } else if (RAW == '"'){
10060 NEXT;
10061 if ((RAW == 'n') && (NXT(1) == 'o')) {
10062 standalone = 0;
10063 SKIP(2);
10064 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10065 (NXT(2) == 's')) {
10066 standalone = 1;
10067 SKIP(3);
10068 } else {
10069 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10070 }
10071 if (RAW != '"') {
10072 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10073 } else
10074 NEXT;
10075 } else {
10076 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10077 }
10078 }
10079 return(standalone);
10080 }
10081
10082 /**
10083 * xmlParseXMLDecl:
10084 * @ctxt: an XML parser context
10085 *
10086 * parse an XML declaration header
10087 *
10088 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10089 */
10090
10091 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10092 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10093 xmlChar *version;
10094
10095 /*
10096 * This value for standalone indicates that the document has an
10097 * XML declaration but it does not have a standalone attribute.
10098 * It will be overwritten later if a standalone attribute is found.
10099 */
10100 ctxt->input->standalone = -2;
10101
10102 /*
10103 * We know that '<?xml' is here.
10104 */
10105 SKIP(5);
10106
10107 if (!IS_BLANK_CH(RAW)) {
10108 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10109 "Blank needed after '<?xml'\n");
10110 }
10111 SKIP_BLANKS;
10112
10113 /*
10114 * We must have the VersionInfo here.
10115 */
10116 version = xmlParseVersionInfo(ctxt);
10117 if (version == NULL) {
10118 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10119 } else {
10120 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10121 /*
10122 * Changed here for XML-1.0 5th edition
10123 */
10124 if (ctxt->options & XML_PARSE_OLD10) {
10125 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10126 "Unsupported version '%s'\n",
10127 version);
10128 } else {
10129 if ((version[0] == '1') && ((version[1] == '.'))) {
10130 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10131 "Unsupported version '%s'\n",
10132 version, NULL);
10133 } else {
10134 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10135 "Unsupported version '%s'\n",
10136 version);
10137 }
10138 }
10139 }
10140 if (ctxt->version != NULL)
10141 xmlFree((void *) ctxt->version);
10142 ctxt->version = version;
10143 }
10144
10145 /*
10146 * We may have the encoding declaration
10147 */
10148 if (!IS_BLANK_CH(RAW)) {
10149 if ((RAW == '?') && (NXT(1) == '>')) {
10150 SKIP(2);
10151 return;
10152 }
10153 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10154 }
10155 xmlParseEncodingDecl(ctxt);
10156 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10157 /*
10158 * The XML REC instructs us to stop parsing right here
10159 */
10160 return;
10161 }
10162
10163 /*
10164 * We may have the standalone status.
10165 */
10166 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10167 if ((RAW == '?') && (NXT(1) == '>')) {
10168 SKIP(2);
10169 return;
10170 }
10171 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10172 }
10173
10174 /*
10175 * We can grow the input buffer freely at that point
10176 */
10177 GROW;
10178
10179 SKIP_BLANKS;
10180 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10181
10182 SKIP_BLANKS;
10183 if ((RAW == '?') && (NXT(1) == '>')) {
10184 SKIP(2);
10185 } else if (RAW == '>') {
10186 /* Deprecated old WD ... */
10187 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10188 NEXT;
10189 } else {
10190 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10191 MOVETO_ENDTAG(CUR_PTR);
10192 NEXT;
10193 }
10194 }
10195
10196 /**
10197 * xmlParseMisc:
10198 * @ctxt: an XML parser context
10199 *
10200 * parse an XML Misc* optional field.
10201 *
10202 * [27] Misc ::= Comment | PI | S
10203 */
10204
10205 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10206 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10207 while (((RAW == '<') && (NXT(1) == '?')) ||
10208 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10209 IS_BLANK_CH(CUR)) {
10210 if ((RAW == '<') && (NXT(1) == '?')) {
10211 xmlParsePI(ctxt);
10212 } else if (IS_BLANK_CH(CUR)) {
10213 NEXT;
10214 } else
10215 xmlParseComment(ctxt);
10216 }
10217 }
10218
10219 /**
10220 * xmlParseDocument:
10221 * @ctxt: an XML parser context
10222 *
10223 * parse an XML document (and build a tree if using the standard SAX
10224 * interface).
10225 *
10226 * [1] document ::= prolog element Misc*
10227 *
10228 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10229 *
10230 * Returns 0, -1 in case of error. the parser context is augmented
10231 * as a result of the parsing.
10232 */
10233
10234 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10235 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10236 xmlChar start[4];
10237 xmlCharEncoding enc;
10238
10239 xmlInitParser();
10240
10241 if ((ctxt == NULL) || (ctxt->input == NULL))
10242 return(-1);
10243
10244 GROW;
10245
10246 /*
10247 * SAX: detecting the level.
10248 */
10249 xmlDetectSAX2(ctxt);
10250
10251 /*
10252 * SAX: beginning of the document processing.
10253 */
10254 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10255 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10256
10257 if ((ctxt->encoding == NULL) &&
10258 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10259 /*
10260 * Get the 4 first bytes and decode the charset
10261 * if enc != XML_CHAR_ENCODING_NONE
10262 * plug some encoding conversion routines.
10263 */
10264 start[0] = RAW;
10265 start[1] = NXT(1);
10266 start[2] = NXT(2);
10267 start[3] = NXT(3);
10268 enc = xmlDetectCharEncoding(&start[0], 4);
10269 if (enc != XML_CHAR_ENCODING_NONE) {
10270 xmlSwitchEncoding(ctxt, enc);
10271 }
10272 }
10273
10274
10275 if (CUR == 0) {
10276 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10277 }
10278
10279 /*
10280 * Check for the XMLDecl in the Prolog.
10281 * do not GROW here to avoid the detected encoder to decode more
10282 * than just the first line, unless the amount of data is really
10283 * too small to hold "<?xml version="1.0" encoding="foo"
10284 */
10285 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10286 GROW;
10287 }
10288 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10289
10290 /*
10291 * Note that we will switch encoding on the fly.
10292 */
10293 xmlParseXMLDecl(ctxt);
10294 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10295 /*
10296 * The XML REC instructs us to stop parsing right here
10297 */
10298 return(-1);
10299 }
10300 ctxt->standalone = ctxt->input->standalone;
10301 SKIP_BLANKS;
10302 } else {
10303 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10304 }
10305 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10306 ctxt->sax->startDocument(ctxt->userData);
10307
10308 /*
10309 * The Misc part of the Prolog
10310 */
10311 GROW;
10312 xmlParseMisc(ctxt);
10313
10314 /*
10315 * Then possibly doc type declaration(s) and more Misc
10316 * (doctypedecl Misc*)?
10317 */
10318 GROW;
10319 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10320
10321 ctxt->inSubset = 1;
10322 xmlParseDocTypeDecl(ctxt);
10323 if (RAW == '[') {
10324 ctxt->instate = XML_PARSER_DTD;
10325 xmlParseInternalSubset(ctxt);
10326 }
10327
10328 /*
10329 * Create and update the external subset.
10330 */
10331 ctxt->inSubset = 2;
10332 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10333 (!ctxt->disableSAX))
10334 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10335 ctxt->extSubSystem, ctxt->extSubURI);
10336 ctxt->inSubset = 0;
10337
10338 xmlCleanSpecialAttr(ctxt);
10339
10340 ctxt->instate = XML_PARSER_PROLOG;
10341 xmlParseMisc(ctxt);
10342 }
10343
10344 /*
10345 * Time to start parsing the tree itself
10346 */
10347 GROW;
10348 if (RAW != '<') {
10349 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10350 "Start tag expected, '<' not found\n");
10351 } else {
10352 ctxt->instate = XML_PARSER_CONTENT;
10353 xmlParseElement(ctxt);
10354 ctxt->instate = XML_PARSER_EPILOG;
10355
10356
10357 /*
10358 * The Misc part at the end
10359 */
10360 xmlParseMisc(ctxt);
10361
10362 if (RAW != 0) {
10363 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10364 }
10365 ctxt->instate = XML_PARSER_EOF;
10366 }
10367
10368 /*
10369 * SAX: end of the document processing.
10370 */
10371 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10372 ctxt->sax->endDocument(ctxt->userData);
10373
10374 /*
10375 * Remove locally kept entity definitions if the tree was not built
10376 */
10377 if ((ctxt->myDoc != NULL) &&
10378 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10379 xmlFreeDoc(ctxt->myDoc);
10380 ctxt->myDoc = NULL;
10381 }
10382
10383 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10384 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10385 if (ctxt->valid)
10386 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10387 if (ctxt->nsWellFormed)
10388 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10389 if (ctxt->options & XML_PARSE_OLD10)
10390 ctxt->myDoc->properties |= XML_DOC_OLD10;
10391 }
10392 if (! ctxt->wellFormed) {
10393 ctxt->valid = 0;
10394 return(-1);
10395 }
10396 return(0);
10397 }
10398
10399 /**
10400 * xmlParseExtParsedEnt:
10401 * @ctxt: an XML parser context
10402 *
10403 * parse a general parsed entity
10404 * An external general parsed entity is well-formed if it matches the
10405 * production labeled extParsedEnt.
10406 *
10407 * [78] extParsedEnt ::= TextDecl? content
10408 *
10409 * Returns 0, -1 in case of error. the parser context is augmented
10410 * as a result of the parsing.
10411 */
10412
10413 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10414 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10415 xmlChar start[4];
10416 xmlCharEncoding enc;
10417
10418 if ((ctxt == NULL) || (ctxt->input == NULL))
10419 return(-1);
10420
10421 xmlDefaultSAXHandlerInit();
10422
10423 xmlDetectSAX2(ctxt);
10424
10425 GROW;
10426
10427 /*
10428 * SAX: beginning of the document processing.
10429 */
10430 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10431 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10432
10433 /*
10434 * Get the 4 first bytes and decode the charset
10435 * if enc != XML_CHAR_ENCODING_NONE
10436 * plug some encoding conversion routines.
10437 */
10438 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10439 start[0] = RAW;
10440 start[1] = NXT(1);
10441 start[2] = NXT(2);
10442 start[3] = NXT(3);
10443 enc = xmlDetectCharEncoding(start, 4);
10444 if (enc != XML_CHAR_ENCODING_NONE) {
10445 xmlSwitchEncoding(ctxt, enc);
10446 }
10447 }
10448
10449
10450 if (CUR == 0) {
10451 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10452 }
10453
10454 /*
10455 * Check for the XMLDecl in the Prolog.
10456 */
10457 GROW;
10458 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10459
10460 /*
10461 * Note that we will switch encoding on the fly.
10462 */
10463 xmlParseXMLDecl(ctxt);
10464 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10465 /*
10466 * The XML REC instructs us to stop parsing right here
10467 */
10468 return(-1);
10469 }
10470 SKIP_BLANKS;
10471 } else {
10472 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10473 }
10474 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10475 ctxt->sax->startDocument(ctxt->userData);
10476
10477 /*
10478 * Doing validity checking on chunk doesn't make sense
10479 */
10480 ctxt->instate = XML_PARSER_CONTENT;
10481 ctxt->validate = 0;
10482 ctxt->loadsubset = 0;
10483 ctxt->depth = 0;
10484
10485 xmlParseContent(ctxt);
10486
10487 if ((RAW == '<') && (NXT(1) == '/')) {
10488 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10489 } else if (RAW != 0) {
10490 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10491 }
10492
10493 /*
10494 * SAX: end of the document processing.
10495 */
10496 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10497 ctxt->sax->endDocument(ctxt->userData);
10498
10499 if (! ctxt->wellFormed) return(-1);
10500 return(0);
10501 }
10502
10503 #ifdef LIBXML_PUSH_ENABLED
10504 /************************************************************************
10505 * *
10506 * Progressive parsing interfaces *
10507 * *
10508 ************************************************************************/
10509
10510 /**
10511 * xmlParseLookupSequence:
10512 * @ctxt: an XML parser context
10513 * @first: the first char to lookup
10514 * @next: the next char to lookup or zero
10515 * @third: the next char to lookup or zero
10516 *
10517 * Try to find if a sequence (first, next, third) or just (first next) or
10518 * (first) is available in the input stream.
10519 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10520 * to avoid rescanning sequences of bytes, it DOES change the state of the
10521 * parser, do not use liberally.
10522 *
10523 * Returns the index to the current parsing point if the full sequence
10524 * is available, -1 otherwise.
10525 */
10526 static int
xmlParseLookupSequence(xmlParserCtxtPtr ctxt,xmlChar first,xmlChar next,xmlChar third)10527 xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10528 xmlChar next, xmlChar third) {
10529 int base, len;
10530 xmlParserInputPtr in;
10531 const xmlChar *buf;
10532
10533 in = ctxt->input;
10534 if (in == NULL) return(-1);
10535 base = in->cur - in->base;
10536 if (base < 0) return(-1);
10537 if (ctxt->checkIndex > base)
10538 base = ctxt->checkIndex;
10539 if (in->buf == NULL) {
10540 buf = in->base;
10541 len = in->length;
10542 } else {
10543 buf = in->buf->buffer->content;
10544 len = in->buf->buffer->use;
10545 }
10546 /* take into account the sequence length */
10547 if (third) len -= 2;
10548 else if (next) len --;
10549 for (;base < len;base++) {
10550 if (buf[base] == first) {
10551 if (third != 0) {
10552 if ((buf[base + 1] != next) ||
10553 (buf[base + 2] != third)) continue;
10554 } else if (next != 0) {
10555 if (buf[base + 1] != next) continue;
10556 }
10557 ctxt->checkIndex = 0;
10558 #ifdef DEBUG_PUSH
10559 if (next == 0)
10560 xmlGenericError(xmlGenericErrorContext,
10561 "PP: lookup '%c' found at %d\n",
10562 first, base);
10563 else if (third == 0)
10564 xmlGenericError(xmlGenericErrorContext,
10565 "PP: lookup '%c%c' found at %d\n",
10566 first, next, base);
10567 else
10568 xmlGenericError(xmlGenericErrorContext,
10569 "PP: lookup '%c%c%c' found at %d\n",
10570 first, next, third, base);
10571 #endif
10572 return(base - (in->cur - in->base));
10573 }
10574 }
10575 ctxt->checkIndex = base;
10576 #ifdef DEBUG_PUSH
10577 if (next == 0)
10578 xmlGenericError(xmlGenericErrorContext,
10579 "PP: lookup '%c' failed\n", first);
10580 else if (third == 0)
10581 xmlGenericError(xmlGenericErrorContext,
10582 "PP: lookup '%c%c' failed\n", first, next);
10583 else
10584 xmlGenericError(xmlGenericErrorContext,
10585 "PP: lookup '%c%c%c' failed\n", first, next, third);
10586 #endif
10587 return(-1);
10588 }
10589
10590 /**
10591 * xmlParseGetLasts:
10592 * @ctxt: an XML parser context
10593 * @lastlt: pointer to store the last '<' from the input
10594 * @lastgt: pointer to store the last '>' from the input
10595 *
10596 * Lookup the last < and > in the current chunk
10597 */
10598 static void
xmlParseGetLasts(xmlParserCtxtPtr ctxt,const xmlChar ** lastlt,const xmlChar ** lastgt)10599 xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10600 const xmlChar **lastgt) {
10601 const xmlChar *tmp;
10602
10603 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10604 xmlGenericError(xmlGenericErrorContext,
10605 "Internal error: xmlParseGetLasts\n");
10606 return;
10607 }
10608 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
10609 tmp = ctxt->input->end;
10610 tmp--;
10611 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
10612 if (tmp < ctxt->input->base) {
10613 *lastlt = NULL;
10614 *lastgt = NULL;
10615 } else {
10616 *lastlt = tmp;
10617 tmp++;
10618 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10619 if (*tmp == '\'') {
10620 tmp++;
10621 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10622 if (tmp < ctxt->input->end) tmp++;
10623 } else if (*tmp == '"') {
10624 tmp++;
10625 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10626 if (tmp < ctxt->input->end) tmp++;
10627 } else
10628 tmp++;
10629 }
10630 if (tmp < ctxt->input->end)
10631 *lastgt = tmp;
10632 else {
10633 tmp = *lastlt;
10634 tmp--;
10635 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10636 if (tmp >= ctxt->input->base)
10637 *lastgt = tmp;
10638 else
10639 *lastgt = NULL;
10640 }
10641 }
10642 } else {
10643 *lastlt = NULL;
10644 *lastgt = NULL;
10645 }
10646 }
10647 /**
10648 * xmlCheckCdataPush:
10649 * @cur: pointer to the bock of characters
10650 * @len: length of the block in bytes
10651 *
10652 * Check that the block of characters is okay as SCdata content [20]
10653 *
10654 * Returns the number of bytes to pass if okay, a negative index where an
10655 * UTF-8 error occured otherwise
10656 */
10657 static int
xmlCheckCdataPush(const xmlChar * utf,int len)10658 xmlCheckCdataPush(const xmlChar *utf, int len) {
10659 int ix;
10660 unsigned char c;
10661 int codepoint;
10662
10663 if ((utf == NULL) || (len <= 0))
10664 return(0);
10665
10666 for (ix = 0; ix < len;) { /* string is 0-terminated */
10667 c = utf[ix];
10668 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10669 if (c >= 0x20)
10670 ix++;
10671 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10672 ix++;
10673 else
10674 return(-ix);
10675 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10676 if (ix + 2 > len) return(ix);
10677 if ((utf[ix+1] & 0xc0 ) != 0x80)
10678 return(-ix);
10679 codepoint = (utf[ix] & 0x1f) << 6;
10680 codepoint |= utf[ix+1] & 0x3f;
10681 if (!xmlIsCharQ(codepoint))
10682 return(-ix);
10683 ix += 2;
10684 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10685 if (ix + 3 > len) return(ix);
10686 if (((utf[ix+1] & 0xc0) != 0x80) ||
10687 ((utf[ix+2] & 0xc0) != 0x80))
10688 return(-ix);
10689 codepoint = (utf[ix] & 0xf) << 12;
10690 codepoint |= (utf[ix+1] & 0x3f) << 6;
10691 codepoint |= utf[ix+2] & 0x3f;
10692 if (!xmlIsCharQ(codepoint))
10693 return(-ix);
10694 ix += 3;
10695 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10696 if (ix + 4 > len) return(ix);
10697 if (((utf[ix+1] & 0xc0) != 0x80) ||
10698 ((utf[ix+2] & 0xc0) != 0x80) ||
10699 ((utf[ix+3] & 0xc0) != 0x80))
10700 return(-ix);
10701 codepoint = (utf[ix] & 0x7) << 18;
10702 codepoint |= (utf[ix+1] & 0x3f) << 12;
10703 codepoint |= (utf[ix+2] & 0x3f) << 6;
10704 codepoint |= utf[ix+3] & 0x3f;
10705 if (!xmlIsCharQ(codepoint))
10706 return(-ix);
10707 ix += 4;
10708 } else /* unknown encoding */
10709 return(-ix);
10710 }
10711 return(ix);
10712 }
10713
10714 /**
10715 * xmlParseTryOrFinish:
10716 * @ctxt: an XML parser context
10717 * @terminate: last chunk indicator
10718 *
10719 * Try to progress on parsing
10720 *
10721 * Returns zero if no parsing was possible
10722 */
10723 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)10724 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10725 int ret = 0;
10726 int avail, tlen;
10727 xmlChar cur, next;
10728 const xmlChar *lastlt, *lastgt;
10729
10730 if (ctxt->input == NULL)
10731 return(0);
10732
10733 #ifdef DEBUG_PUSH
10734 switch (ctxt->instate) {
10735 case XML_PARSER_EOF:
10736 xmlGenericError(xmlGenericErrorContext,
10737 "PP: try EOF\n"); break;
10738 case XML_PARSER_START:
10739 xmlGenericError(xmlGenericErrorContext,
10740 "PP: try START\n"); break;
10741 case XML_PARSER_MISC:
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: try MISC\n");break;
10744 case XML_PARSER_COMMENT:
10745 xmlGenericError(xmlGenericErrorContext,
10746 "PP: try COMMENT\n");break;
10747 case XML_PARSER_PROLOG:
10748 xmlGenericError(xmlGenericErrorContext,
10749 "PP: try PROLOG\n");break;
10750 case XML_PARSER_START_TAG:
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: try START_TAG\n");break;
10753 case XML_PARSER_CONTENT:
10754 xmlGenericError(xmlGenericErrorContext,
10755 "PP: try CONTENT\n");break;
10756 case XML_PARSER_CDATA_SECTION:
10757 xmlGenericError(xmlGenericErrorContext,
10758 "PP: try CDATA_SECTION\n");break;
10759 case XML_PARSER_END_TAG:
10760 xmlGenericError(xmlGenericErrorContext,
10761 "PP: try END_TAG\n");break;
10762 case XML_PARSER_ENTITY_DECL:
10763 xmlGenericError(xmlGenericErrorContext,
10764 "PP: try ENTITY_DECL\n");break;
10765 case XML_PARSER_ENTITY_VALUE:
10766 xmlGenericError(xmlGenericErrorContext,
10767 "PP: try ENTITY_VALUE\n");break;
10768 case XML_PARSER_ATTRIBUTE_VALUE:
10769 xmlGenericError(xmlGenericErrorContext,
10770 "PP: try ATTRIBUTE_VALUE\n");break;
10771 case XML_PARSER_DTD:
10772 xmlGenericError(xmlGenericErrorContext,
10773 "PP: try DTD\n");break;
10774 case XML_PARSER_EPILOG:
10775 xmlGenericError(xmlGenericErrorContext,
10776 "PP: try EPILOG\n");break;
10777 case XML_PARSER_PI:
10778 xmlGenericError(xmlGenericErrorContext,
10779 "PP: try PI\n");break;
10780 case XML_PARSER_IGNORE:
10781 xmlGenericError(xmlGenericErrorContext,
10782 "PP: try IGNORE\n");break;
10783 }
10784 #endif
10785
10786 if ((ctxt->input != NULL) &&
10787 (ctxt->input->cur - ctxt->input->base > 4096)) {
10788 xmlSHRINK(ctxt);
10789 ctxt->checkIndex = 0;
10790 }
10791 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10792
10793 while (1) {
10794 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10795 return(0);
10796
10797
10798 /*
10799 * Pop-up of finished entities.
10800 */
10801 while ((RAW == 0) && (ctxt->inputNr > 1))
10802 xmlPopInput(ctxt);
10803
10804 if (ctxt->input == NULL) break;
10805 if (ctxt->input->buf == NULL)
10806 avail = ctxt->input->length -
10807 (ctxt->input->cur - ctxt->input->base);
10808 else {
10809 /*
10810 * If we are operating on converted input, try to flush
10811 * remainng chars to avoid them stalling in the non-converted
10812 * buffer.
10813 */
10814 if ((ctxt->input->buf->raw != NULL) &&
10815 (ctxt->input->buf->raw->use > 0)) {
10816 int base = ctxt->input->base -
10817 ctxt->input->buf->buffer->content;
10818 int current = ctxt->input->cur - ctxt->input->base;
10819
10820 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10821 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10822 ctxt->input->cur = ctxt->input->base + current;
10823 ctxt->input->end =
10824 &ctxt->input->buf->buffer->content[
10825 ctxt->input->buf->buffer->use];
10826 }
10827 avail = ctxt->input->buf->buffer->use -
10828 (ctxt->input->cur - ctxt->input->base);
10829 }
10830 if (avail < 1)
10831 goto done;
10832 switch (ctxt->instate) {
10833 case XML_PARSER_EOF:
10834 /*
10835 * Document parsing is done !
10836 */
10837 goto done;
10838 case XML_PARSER_START:
10839 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10840 xmlChar start[4];
10841 xmlCharEncoding enc;
10842
10843 /*
10844 * Very first chars read from the document flow.
10845 */
10846 if (avail < 4)
10847 goto done;
10848
10849 /*
10850 * Get the 4 first bytes and decode the charset
10851 * if enc != XML_CHAR_ENCODING_NONE
10852 * plug some encoding conversion routines,
10853 * else xmlSwitchEncoding will set to (default)
10854 * UTF8.
10855 */
10856 start[0] = RAW;
10857 start[1] = NXT(1);
10858 start[2] = NXT(2);
10859 start[3] = NXT(3);
10860 enc = xmlDetectCharEncoding(start, 4);
10861 xmlSwitchEncoding(ctxt, enc);
10862 break;
10863 }
10864
10865 if (avail < 2)
10866 goto done;
10867 cur = ctxt->input->cur[0];
10868 next = ctxt->input->cur[1];
10869 if (cur == 0) {
10870 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10871 ctxt->sax->setDocumentLocator(ctxt->userData,
10872 &xmlDefaultSAXLocator);
10873 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10874 ctxt->instate = XML_PARSER_EOF;
10875 #ifdef DEBUG_PUSH
10876 xmlGenericError(xmlGenericErrorContext,
10877 "PP: entering EOF\n");
10878 #endif
10879 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10880 ctxt->sax->endDocument(ctxt->userData);
10881 goto done;
10882 }
10883 if ((cur == '<') && (next == '?')) {
10884 /* PI or XML decl */
10885 if (avail < 5) return(ret);
10886 if ((!terminate) &&
10887 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10888 return(ret);
10889 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10890 ctxt->sax->setDocumentLocator(ctxt->userData,
10891 &xmlDefaultSAXLocator);
10892 if ((ctxt->input->cur[2] == 'x') &&
10893 (ctxt->input->cur[3] == 'm') &&
10894 (ctxt->input->cur[4] == 'l') &&
10895 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10896 ret += 5;
10897 #ifdef DEBUG_PUSH
10898 xmlGenericError(xmlGenericErrorContext,
10899 "PP: Parsing XML Decl\n");
10900 #endif
10901 xmlParseXMLDecl(ctxt);
10902 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10903 /*
10904 * The XML REC instructs us to stop parsing right
10905 * here
10906 */
10907 ctxt->instate = XML_PARSER_EOF;
10908 return(0);
10909 }
10910 ctxt->standalone = ctxt->input->standalone;
10911 if ((ctxt->encoding == NULL) &&
10912 (ctxt->input->encoding != NULL))
10913 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10914 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10915 (!ctxt->disableSAX))
10916 ctxt->sax->startDocument(ctxt->userData);
10917 ctxt->instate = XML_PARSER_MISC;
10918 #ifdef DEBUG_PUSH
10919 xmlGenericError(xmlGenericErrorContext,
10920 "PP: entering MISC\n");
10921 #endif
10922 } else {
10923 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10924 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10925 (!ctxt->disableSAX))
10926 ctxt->sax->startDocument(ctxt->userData);
10927 ctxt->instate = XML_PARSER_MISC;
10928 #ifdef DEBUG_PUSH
10929 xmlGenericError(xmlGenericErrorContext,
10930 "PP: entering MISC\n");
10931 #endif
10932 }
10933 } else {
10934 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10935 ctxt->sax->setDocumentLocator(ctxt->userData,
10936 &xmlDefaultSAXLocator);
10937 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10938 if (ctxt->version == NULL) {
10939 xmlErrMemory(ctxt, NULL);
10940 break;
10941 }
10942 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10943 (!ctxt->disableSAX))
10944 ctxt->sax->startDocument(ctxt->userData);
10945 ctxt->instate = XML_PARSER_MISC;
10946 #ifdef DEBUG_PUSH
10947 xmlGenericError(xmlGenericErrorContext,
10948 "PP: entering MISC\n");
10949 #endif
10950 }
10951 break;
10952 case XML_PARSER_START_TAG: {
10953 const xmlChar *name;
10954 const xmlChar *prefix = NULL;
10955 const xmlChar *URI = NULL;
10956 int nsNr = ctxt->nsNr;
10957
10958 if ((avail < 2) && (ctxt->inputNr == 1))
10959 goto done;
10960 cur = ctxt->input->cur[0];
10961 if (cur != '<') {
10962 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10963 ctxt->instate = XML_PARSER_EOF;
10964 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10965 ctxt->sax->endDocument(ctxt->userData);
10966 goto done;
10967 }
10968 if (!terminate) {
10969 if (ctxt->progressive) {
10970 /* > can be found unescaped in attribute values */
10971 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10972 goto done;
10973 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10974 goto done;
10975 }
10976 }
10977 if (ctxt->spaceNr == 0)
10978 spacePush(ctxt, -1);
10979 else if (*ctxt->space == -2)
10980 spacePush(ctxt, -1);
10981 else
10982 spacePush(ctxt, *ctxt->space);
10983 #ifdef LIBXML_SAX1_ENABLED
10984 if (ctxt->sax2)
10985 #endif /* LIBXML_SAX1_ENABLED */
10986 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10987 #ifdef LIBXML_SAX1_ENABLED
10988 else
10989 name = xmlParseStartTag(ctxt);
10990 #endif /* LIBXML_SAX1_ENABLED */
10991 if (ctxt->instate == XML_PARSER_EOF)
10992 goto done;
10993 if (name == NULL) {
10994 spacePop(ctxt);
10995 ctxt->instate = XML_PARSER_EOF;
10996 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10997 ctxt->sax->endDocument(ctxt->userData);
10998 goto done;
10999 }
11000 #ifdef LIBXML_VALID_ENABLED
11001 /*
11002 * [ VC: Root Element Type ]
11003 * The Name in the document type declaration must match
11004 * the element type of the root element.
11005 */
11006 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11007 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11008 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11009 #endif /* LIBXML_VALID_ENABLED */
11010
11011 /*
11012 * Check for an Empty Element.
11013 */
11014 if ((RAW == '/') && (NXT(1) == '>')) {
11015 SKIP(2);
11016
11017 if (ctxt->sax2) {
11018 if ((ctxt->sax != NULL) &&
11019 (ctxt->sax->endElementNs != NULL) &&
11020 (!ctxt->disableSAX))
11021 ctxt->sax->endElementNs(ctxt->userData, name,
11022 prefix, URI);
11023 if (ctxt->nsNr - nsNr > 0)
11024 nsPop(ctxt, ctxt->nsNr - nsNr);
11025 #ifdef LIBXML_SAX1_ENABLED
11026 } else {
11027 if ((ctxt->sax != NULL) &&
11028 (ctxt->sax->endElement != NULL) &&
11029 (!ctxt->disableSAX))
11030 ctxt->sax->endElement(ctxt->userData, name);
11031 #endif /* LIBXML_SAX1_ENABLED */
11032 }
11033 spacePop(ctxt);
11034 if (ctxt->nameNr == 0) {
11035 ctxt->instate = XML_PARSER_EPILOG;
11036 } else {
11037 ctxt->instate = XML_PARSER_CONTENT;
11038 }
11039 break;
11040 }
11041 if (RAW == '>') {
11042 NEXT;
11043 } else {
11044 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11045 "Couldn't find end of Start Tag %s\n",
11046 name);
11047 nodePop(ctxt);
11048 spacePop(ctxt);
11049 }
11050 if (ctxt->sax2)
11051 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11052 #ifdef LIBXML_SAX1_ENABLED
11053 else
11054 namePush(ctxt, name);
11055 #endif /* LIBXML_SAX1_ENABLED */
11056
11057 ctxt->instate = XML_PARSER_CONTENT;
11058 break;
11059 }
11060 case XML_PARSER_CONTENT: {
11061 const xmlChar *test;
11062 unsigned int cons;
11063 if ((avail < 2) && (ctxt->inputNr == 1))
11064 goto done;
11065 cur = ctxt->input->cur[0];
11066 next = ctxt->input->cur[1];
11067
11068 test = CUR_PTR;
11069 cons = ctxt->input->consumed;
11070 if ((cur == '<') && (next == '/')) {
11071 ctxt->instate = XML_PARSER_END_TAG;
11072 break;
11073 } else if ((cur == '<') && (next == '?')) {
11074 if ((!terminate) &&
11075 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11076 goto done;
11077 xmlParsePI(ctxt);
11078 } else if ((cur == '<') && (next != '!')) {
11079 ctxt->instate = XML_PARSER_START_TAG;
11080 break;
11081 } else if ((cur == '<') && (next == '!') &&
11082 (ctxt->input->cur[2] == '-') &&
11083 (ctxt->input->cur[3] == '-')) {
11084 int term;
11085
11086 if (avail < 4)
11087 goto done;
11088 ctxt->input->cur += 4;
11089 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11090 ctxt->input->cur -= 4;
11091 if ((!terminate) && (term < 0))
11092 goto done;
11093 xmlParseComment(ctxt);
11094 ctxt->instate = XML_PARSER_CONTENT;
11095 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11096 (ctxt->input->cur[2] == '[') &&
11097 (ctxt->input->cur[3] == 'C') &&
11098 (ctxt->input->cur[4] == 'D') &&
11099 (ctxt->input->cur[5] == 'A') &&
11100 (ctxt->input->cur[6] == 'T') &&
11101 (ctxt->input->cur[7] == 'A') &&
11102 (ctxt->input->cur[8] == '[')) {
11103 SKIP(9);
11104 ctxt->instate = XML_PARSER_CDATA_SECTION;
11105 break;
11106 } else if ((cur == '<') && (next == '!') &&
11107 (avail < 9)) {
11108 goto done;
11109 } else if (cur == '&') {
11110 if ((!terminate) &&
11111 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11112 goto done;
11113 xmlParseReference(ctxt);
11114 } else {
11115 /* TODO Avoid the extra copy, handle directly !!! */
11116 /*
11117 * Goal of the following test is:
11118 * - minimize calls to the SAX 'character' callback
11119 * when they are mergeable
11120 * - handle an problem for isBlank when we only parse
11121 * a sequence of blank chars and the next one is
11122 * not available to check against '<' presence.
11123 * - tries to homogenize the differences in SAX
11124 * callbacks between the push and pull versions
11125 * of the parser.
11126 */
11127 if ((ctxt->inputNr == 1) &&
11128 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11129 if (!terminate) {
11130 if (ctxt->progressive) {
11131 if ((lastlt == NULL) ||
11132 (ctxt->input->cur > lastlt))
11133 goto done;
11134 } else if (xmlParseLookupSequence(ctxt,
11135 '<', 0, 0) < 0) {
11136 goto done;
11137 }
11138 }
11139 }
11140 ctxt->checkIndex = 0;
11141 xmlParseCharData(ctxt, 0);
11142 }
11143 /*
11144 * Pop-up of finished entities.
11145 */
11146 while ((RAW == 0) && (ctxt->inputNr > 1))
11147 xmlPopInput(ctxt);
11148 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11149 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11150 "detected an error in element content\n");
11151 ctxt->instate = XML_PARSER_EOF;
11152 break;
11153 }
11154 break;
11155 }
11156 case XML_PARSER_END_TAG:
11157 if (avail < 2)
11158 goto done;
11159 if (!terminate) {
11160 if (ctxt->progressive) {
11161 /* > can be found unescaped in attribute values */
11162 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11163 goto done;
11164 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11165 goto done;
11166 }
11167 }
11168 if (ctxt->sax2) {
11169 xmlParseEndTag2(ctxt,
11170 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11171 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11172 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11173 nameNsPop(ctxt);
11174 }
11175 #ifdef LIBXML_SAX1_ENABLED
11176 else
11177 xmlParseEndTag1(ctxt, 0);
11178 #endif /* LIBXML_SAX1_ENABLED */
11179 if (ctxt->instate == XML_PARSER_EOF) {
11180 /* Nothing */
11181 } else if (ctxt->nameNr == 0) {
11182 ctxt->instate = XML_PARSER_EPILOG;
11183 } else {
11184 ctxt->instate = XML_PARSER_CONTENT;
11185 }
11186 break;
11187 case XML_PARSER_CDATA_SECTION: {
11188 /*
11189 * The Push mode need to have the SAX callback for
11190 * cdataBlock merge back contiguous callbacks.
11191 */
11192 int base;
11193
11194 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11195 if (base < 0) {
11196 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11197 int tmp;
11198
11199 tmp = xmlCheckCdataPush(ctxt->input->cur,
11200 XML_PARSER_BIG_BUFFER_SIZE);
11201 if (tmp < 0) {
11202 tmp = -tmp;
11203 ctxt->input->cur += tmp;
11204 goto encoding_error;
11205 }
11206 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11207 if (ctxt->sax->cdataBlock != NULL)
11208 ctxt->sax->cdataBlock(ctxt->userData,
11209 ctxt->input->cur, tmp);
11210 else if (ctxt->sax->characters != NULL)
11211 ctxt->sax->characters(ctxt->userData,
11212 ctxt->input->cur, tmp);
11213 }
11214 SKIPL(tmp);
11215 ctxt->checkIndex = 0;
11216 }
11217 goto done;
11218 } else {
11219 int tmp;
11220
11221 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11222 if ((tmp < 0) || (tmp != base)) {
11223 tmp = -tmp;
11224 ctxt->input->cur += tmp;
11225 goto encoding_error;
11226 }
11227 if ((ctxt->sax != NULL) && (base == 0) &&
11228 (ctxt->sax->cdataBlock != NULL) &&
11229 (!ctxt->disableSAX)) {
11230 /*
11231 * Special case to provide identical behaviour
11232 * between pull and push parsers on enpty CDATA
11233 * sections
11234 */
11235 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11236 (!strncmp((const char *)&ctxt->input->cur[-9],
11237 "<![CDATA[", 9)))
11238 ctxt->sax->cdataBlock(ctxt->userData,
11239 BAD_CAST "", 0);
11240 } else if ((ctxt->sax != NULL) && (base > 0) &&
11241 (!ctxt->disableSAX)) {
11242 if (ctxt->sax->cdataBlock != NULL)
11243 ctxt->sax->cdataBlock(ctxt->userData,
11244 ctxt->input->cur, base);
11245 else if (ctxt->sax->characters != NULL)
11246 ctxt->sax->characters(ctxt->userData,
11247 ctxt->input->cur, base);
11248 }
11249 SKIPL(base + 3);
11250 ctxt->checkIndex = 0;
11251 ctxt->instate = XML_PARSER_CONTENT;
11252 #ifdef DEBUG_PUSH
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: entering CONTENT\n");
11255 #endif
11256 }
11257 break;
11258 }
11259 case XML_PARSER_MISC:
11260 SKIP_BLANKS;
11261 if (ctxt->input->buf == NULL)
11262 avail = ctxt->input->length -
11263 (ctxt->input->cur - ctxt->input->base);
11264 else
11265 avail = ctxt->input->buf->buffer->use -
11266 (ctxt->input->cur - ctxt->input->base);
11267 if (avail < 2)
11268 goto done;
11269 cur = ctxt->input->cur[0];
11270 next = ctxt->input->cur[1];
11271 if ((cur == '<') && (next == '?')) {
11272 if ((!terminate) &&
11273 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11274 goto done;
11275 #ifdef DEBUG_PUSH
11276 xmlGenericError(xmlGenericErrorContext,
11277 "PP: Parsing PI\n");
11278 #endif
11279 xmlParsePI(ctxt);
11280 ctxt->checkIndex = 0;
11281 } else if ((cur == '<') && (next == '!') &&
11282 (ctxt->input->cur[2] == '-') &&
11283 (ctxt->input->cur[3] == '-')) {
11284 if ((!terminate) &&
11285 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11286 goto done;
11287 #ifdef DEBUG_PUSH
11288 xmlGenericError(xmlGenericErrorContext,
11289 "PP: Parsing Comment\n");
11290 #endif
11291 xmlParseComment(ctxt);
11292 ctxt->instate = XML_PARSER_MISC;
11293 ctxt->checkIndex = 0;
11294 } else if ((cur == '<') && (next == '!') &&
11295 (ctxt->input->cur[2] == 'D') &&
11296 (ctxt->input->cur[3] == 'O') &&
11297 (ctxt->input->cur[4] == 'C') &&
11298 (ctxt->input->cur[5] == 'T') &&
11299 (ctxt->input->cur[6] == 'Y') &&
11300 (ctxt->input->cur[7] == 'P') &&
11301 (ctxt->input->cur[8] == 'E')) {
11302 if ((!terminate) &&
11303 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11304 goto done;
11305 #ifdef DEBUG_PUSH
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: Parsing internal subset\n");
11308 #endif
11309 ctxt->inSubset = 1;
11310 xmlParseDocTypeDecl(ctxt);
11311 if (RAW == '[') {
11312 ctxt->instate = XML_PARSER_DTD;
11313 #ifdef DEBUG_PUSH
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: entering DTD\n");
11316 #endif
11317 } else {
11318 /*
11319 * Create and update the external subset.
11320 */
11321 ctxt->inSubset = 2;
11322 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11323 (ctxt->sax->externalSubset != NULL))
11324 ctxt->sax->externalSubset(ctxt->userData,
11325 ctxt->intSubName, ctxt->extSubSystem,
11326 ctxt->extSubURI);
11327 ctxt->inSubset = 0;
11328 xmlCleanSpecialAttr(ctxt);
11329 ctxt->instate = XML_PARSER_PROLOG;
11330 #ifdef DEBUG_PUSH
11331 xmlGenericError(xmlGenericErrorContext,
11332 "PP: entering PROLOG\n");
11333 #endif
11334 }
11335 } else if ((cur == '<') && (next == '!') &&
11336 (avail < 9)) {
11337 goto done;
11338 } else {
11339 ctxt->instate = XML_PARSER_START_TAG;
11340 ctxt->progressive = 1;
11341 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11342 #ifdef DEBUG_PUSH
11343 xmlGenericError(xmlGenericErrorContext,
11344 "PP: entering START_TAG\n");
11345 #endif
11346 }
11347 break;
11348 case XML_PARSER_PROLOG:
11349 SKIP_BLANKS;
11350 if (ctxt->input->buf == NULL)
11351 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11352 else
11353 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11354 if (avail < 2)
11355 goto done;
11356 cur = ctxt->input->cur[0];
11357 next = ctxt->input->cur[1];
11358 if ((cur == '<') && (next == '?')) {
11359 if ((!terminate) &&
11360 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11361 goto done;
11362 #ifdef DEBUG_PUSH
11363 xmlGenericError(xmlGenericErrorContext,
11364 "PP: Parsing PI\n");
11365 #endif
11366 xmlParsePI(ctxt);
11367 } else if ((cur == '<') && (next == '!') &&
11368 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11369 if ((!terminate) &&
11370 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11371 goto done;
11372 #ifdef DEBUG_PUSH
11373 xmlGenericError(xmlGenericErrorContext,
11374 "PP: Parsing Comment\n");
11375 #endif
11376 xmlParseComment(ctxt);
11377 ctxt->instate = XML_PARSER_PROLOG;
11378 } else if ((cur == '<') && (next == '!') &&
11379 (avail < 4)) {
11380 goto done;
11381 } else {
11382 ctxt->instate = XML_PARSER_START_TAG;
11383 if (ctxt->progressive == 0)
11384 ctxt->progressive = 1;
11385 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11386 #ifdef DEBUG_PUSH
11387 xmlGenericError(xmlGenericErrorContext,
11388 "PP: entering START_TAG\n");
11389 #endif
11390 }
11391 break;
11392 case XML_PARSER_EPILOG:
11393 SKIP_BLANKS;
11394 if (ctxt->input->buf == NULL)
11395 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11396 else
11397 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11398 if (avail < 2)
11399 goto done;
11400 cur = ctxt->input->cur[0];
11401 next = ctxt->input->cur[1];
11402 if ((cur == '<') && (next == '?')) {
11403 if ((!terminate) &&
11404 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11405 goto done;
11406 #ifdef DEBUG_PUSH
11407 xmlGenericError(xmlGenericErrorContext,
11408 "PP: Parsing PI\n");
11409 #endif
11410 xmlParsePI(ctxt);
11411 ctxt->instate = XML_PARSER_EPILOG;
11412 } else if ((cur == '<') && (next == '!') &&
11413 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11414 if ((!terminate) &&
11415 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11416 goto done;
11417 #ifdef DEBUG_PUSH
11418 xmlGenericError(xmlGenericErrorContext,
11419 "PP: Parsing Comment\n");
11420 #endif
11421 xmlParseComment(ctxt);
11422 ctxt->instate = XML_PARSER_EPILOG;
11423 } else if ((cur == '<') && (next == '!') &&
11424 (avail < 4)) {
11425 goto done;
11426 } else {
11427 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11428 ctxt->instate = XML_PARSER_EOF;
11429 #ifdef DEBUG_PUSH
11430 xmlGenericError(xmlGenericErrorContext,
11431 "PP: entering EOF\n");
11432 #endif
11433 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11434 ctxt->sax->endDocument(ctxt->userData);
11435 goto done;
11436 }
11437 break;
11438 case XML_PARSER_DTD: {
11439 /*
11440 * Sorry but progressive parsing of the internal subset
11441 * is not expected to be supported. We first check that
11442 * the full content of the internal subset is available and
11443 * the parsing is launched only at that point.
11444 * Internal subset ends up with "']' S? '>'" in an unescaped
11445 * section and not in a ']]>' sequence which are conditional
11446 * sections (whoever argued to keep that crap in XML deserve
11447 * a place in hell !).
11448 */
11449 int base, i;
11450 xmlChar *buf;
11451 xmlChar quote = 0;
11452
11453 base = ctxt->input->cur - ctxt->input->base;
11454 if (base < 0) return(0);
11455 if (ctxt->checkIndex > base)
11456 base = ctxt->checkIndex;
11457 buf = ctxt->input->buf->buffer->content;
11458 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11459 base++) {
11460 if (quote != 0) {
11461 if (buf[base] == quote)
11462 quote = 0;
11463 continue;
11464 }
11465 if ((quote == 0) && (buf[base] == '<')) {
11466 int found = 0;
11467 /* special handling of comments */
11468 if (((unsigned int) base + 4 <
11469 ctxt->input->buf->buffer->use) &&
11470 (buf[base + 1] == '!') &&
11471 (buf[base + 2] == '-') &&
11472 (buf[base + 3] == '-')) {
11473 for (;(unsigned int) base + 3 <
11474 ctxt->input->buf->buffer->use; base++) {
11475 if ((buf[base] == '-') &&
11476 (buf[base + 1] == '-') &&
11477 (buf[base + 2] == '>')) {
11478 found = 1;
11479 base += 2;
11480 break;
11481 }
11482 }
11483 if (!found) {
11484 #if 0
11485 fprintf(stderr, "unfinished comment\n");
11486 #endif
11487 break; /* for */
11488 }
11489 continue;
11490 }
11491 }
11492 if (buf[base] == '"') {
11493 quote = '"';
11494 continue;
11495 }
11496 if (buf[base] == '\'') {
11497 quote = '\'';
11498 continue;
11499 }
11500 if (buf[base] == ']') {
11501 #if 0
11502 fprintf(stderr, "%c%c%c%c: ", buf[base],
11503 buf[base + 1], buf[base + 2], buf[base + 3]);
11504 #endif
11505 if ((unsigned int) base +1 >=
11506 ctxt->input->buf->buffer->use)
11507 break;
11508 if (buf[base + 1] == ']') {
11509 /* conditional crap, skip both ']' ! */
11510 base++;
11511 continue;
11512 }
11513 for (i = 1;
11514 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11515 i++) {
11516 if (buf[base + i] == '>') {
11517 #if 0
11518 fprintf(stderr, "found\n");
11519 #endif
11520 goto found_end_int_subset;
11521 }
11522 if (!IS_BLANK_CH(buf[base + i])) {
11523 #if 0
11524 fprintf(stderr, "not found\n");
11525 #endif
11526 goto not_end_of_int_subset;
11527 }
11528 }
11529 #if 0
11530 fprintf(stderr, "end of stream\n");
11531 #endif
11532 break;
11533
11534 }
11535 not_end_of_int_subset:
11536 continue; /* for */
11537 }
11538 /*
11539 * We didn't found the end of the Internal subset
11540 */
11541 #ifdef DEBUG_PUSH
11542 if (next == 0)
11543 xmlGenericError(xmlGenericErrorContext,
11544 "PP: lookup of int subset end filed\n");
11545 #endif
11546 goto done;
11547
11548 found_end_int_subset:
11549 xmlParseInternalSubset(ctxt);
11550 ctxt->inSubset = 2;
11551 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11552 (ctxt->sax->externalSubset != NULL))
11553 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11554 ctxt->extSubSystem, ctxt->extSubURI);
11555 ctxt->inSubset = 0;
11556 xmlCleanSpecialAttr(ctxt);
11557 ctxt->instate = XML_PARSER_PROLOG;
11558 ctxt->checkIndex = 0;
11559 #ifdef DEBUG_PUSH
11560 xmlGenericError(xmlGenericErrorContext,
11561 "PP: entering PROLOG\n");
11562 #endif
11563 break;
11564 }
11565 case XML_PARSER_COMMENT:
11566 xmlGenericError(xmlGenericErrorContext,
11567 "PP: internal error, state == COMMENT\n");
11568 ctxt->instate = XML_PARSER_CONTENT;
11569 #ifdef DEBUG_PUSH
11570 xmlGenericError(xmlGenericErrorContext,
11571 "PP: entering CONTENT\n");
11572 #endif
11573 break;
11574 case XML_PARSER_IGNORE:
11575 xmlGenericError(xmlGenericErrorContext,
11576 "PP: internal error, state == IGNORE");
11577 ctxt->instate = XML_PARSER_DTD;
11578 #ifdef DEBUG_PUSH
11579 xmlGenericError(xmlGenericErrorContext,
11580 "PP: entering DTD\n");
11581 #endif
11582 break;
11583 case XML_PARSER_PI:
11584 xmlGenericError(xmlGenericErrorContext,
11585 "PP: internal error, state == PI\n");
11586 ctxt->instate = XML_PARSER_CONTENT;
11587 #ifdef DEBUG_PUSH
11588 xmlGenericError(xmlGenericErrorContext,
11589 "PP: entering CONTENT\n");
11590 #endif
11591 break;
11592 case XML_PARSER_ENTITY_DECL:
11593 xmlGenericError(xmlGenericErrorContext,
11594 "PP: internal error, state == ENTITY_DECL\n");
11595 ctxt->instate = XML_PARSER_DTD;
11596 #ifdef DEBUG_PUSH
11597 xmlGenericError(xmlGenericErrorContext,
11598 "PP: entering DTD\n");
11599 #endif
11600 break;
11601 case XML_PARSER_ENTITY_VALUE:
11602 xmlGenericError(xmlGenericErrorContext,
11603 "PP: internal error, state == ENTITY_VALUE\n");
11604 ctxt->instate = XML_PARSER_CONTENT;
11605 #ifdef DEBUG_PUSH
11606 xmlGenericError(xmlGenericErrorContext,
11607 "PP: entering DTD\n");
11608 #endif
11609 break;
11610 case XML_PARSER_ATTRIBUTE_VALUE:
11611 xmlGenericError(xmlGenericErrorContext,
11612 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11613 ctxt->instate = XML_PARSER_START_TAG;
11614 #ifdef DEBUG_PUSH
11615 xmlGenericError(xmlGenericErrorContext,
11616 "PP: entering START_TAG\n");
11617 #endif
11618 break;
11619 case XML_PARSER_SYSTEM_LITERAL:
11620 xmlGenericError(xmlGenericErrorContext,
11621 "PP: internal error, state == SYSTEM_LITERAL\n");
11622 ctxt->instate = XML_PARSER_START_TAG;
11623 #ifdef DEBUG_PUSH
11624 xmlGenericError(xmlGenericErrorContext,
11625 "PP: entering START_TAG\n");
11626 #endif
11627 break;
11628 case XML_PARSER_PUBLIC_LITERAL:
11629 xmlGenericError(xmlGenericErrorContext,
11630 "PP: internal error, state == PUBLIC_LITERAL\n");
11631 ctxt->instate = XML_PARSER_START_TAG;
11632 #ifdef DEBUG_PUSH
11633 xmlGenericError(xmlGenericErrorContext,
11634 "PP: entering START_TAG\n");
11635 #endif
11636 break;
11637 }
11638 }
11639 done:
11640 #ifdef DEBUG_PUSH
11641 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11642 #endif
11643 return(ret);
11644 encoding_error:
11645 {
11646 char buffer[150];
11647
11648 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11649 ctxt->input->cur[0], ctxt->input->cur[1],
11650 ctxt->input->cur[2], ctxt->input->cur[3]);
11651 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11652 "Input is not proper UTF-8, indicate encoding !\n%s",
11653 BAD_CAST buffer, NULL);
11654 }
11655 return(0);
11656 }
11657
11658 /**
11659 * xmlParseChunk:
11660 * @ctxt: an XML parser context
11661 * @chunk: an char array
11662 * @size: the size in byte of the chunk
11663 * @terminate: last chunk indicator
11664 *
11665 * Parse a Chunk of memory
11666 *
11667 * Returns zero if no error, the xmlParserErrors otherwise.
11668 */
11669 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11670 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11671 int terminate) {
11672 int end_in_lf = 0;
11673 int remain = 0;
11674
11675 if (ctxt == NULL)
11676 return(XML_ERR_INTERNAL_ERROR);
11677 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11678 return(ctxt->errNo);
11679 if (ctxt->instate == XML_PARSER_START)
11680 xmlDetectSAX2(ctxt);
11681 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11682 (chunk[size - 1] == '\r')) {
11683 end_in_lf = 1;
11684 size--;
11685 }
11686
11687 xmldecl_done:
11688
11689 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11690 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11691 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11692 int cur = ctxt->input->cur - ctxt->input->base;
11693 int res;
11694
11695 /*
11696 * Specific handling if we autodetected an encoding, we should not
11697 * push more than the first line ... which depend on the encoding
11698 * And only push the rest once the final encoding was detected
11699 */
11700 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11701 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11702 unsigned int len = 45;
11703
11704 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11705 BAD_CAST "UTF-16")) ||
11706 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11707 BAD_CAST "UTF16")))
11708 len = 90;
11709 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11710 BAD_CAST "UCS-4")) ||
11711 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11712 BAD_CAST "UCS4")))
11713 len = 180;
11714
11715 if (ctxt->input->buf->rawconsumed < len)
11716 len -= ctxt->input->buf->rawconsumed;
11717
11718 /*
11719 * Change size for reading the initial declaration only
11720 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11721 * will blindly copy extra bytes from memory.
11722 */
11723 if ((unsigned int) size > len) {
11724 remain = size - len;
11725 size = len;
11726 } else {
11727 remain = 0;
11728 }
11729 }
11730 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11731 if (res < 0) {
11732 ctxt->errNo = XML_PARSER_EOF;
11733 ctxt->disableSAX = 1;
11734 return (XML_PARSER_EOF);
11735 }
11736 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11737 ctxt->input->cur = ctxt->input->base + cur;
11738 ctxt->input->end =
11739 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11740 #ifdef DEBUG_PUSH
11741 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11742 #endif
11743
11744 } else if (ctxt->instate != XML_PARSER_EOF) {
11745 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11746 xmlParserInputBufferPtr in = ctxt->input->buf;
11747 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11748 (in->raw != NULL)) {
11749 int nbchars;
11750
11751 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11752 if (nbchars < 0) {
11753 /* TODO 2.6.0 */
11754 xmlGenericError(xmlGenericErrorContext,
11755 "xmlParseChunk: encoder error\n");
11756 return(XML_ERR_INVALID_ENCODING);
11757 }
11758 }
11759 }
11760 }
11761 if (remain != 0)
11762 xmlParseTryOrFinish(ctxt, 0);
11763 else
11764 xmlParseTryOrFinish(ctxt, terminate);
11765 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11766 return(ctxt->errNo);
11767
11768 if (remain != 0) {
11769 chunk += size;
11770 size = remain;
11771 remain = 0;
11772 goto xmldecl_done;
11773 }
11774 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11775 (ctxt->input->buf != NULL)) {
11776 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11777 }
11778 if (terminate) {
11779 /*
11780 * Check for termination
11781 */
11782 int avail = 0;
11783
11784 if (ctxt->input != NULL) {
11785 if (ctxt->input->buf == NULL)
11786 avail = ctxt->input->length -
11787 (ctxt->input->cur - ctxt->input->base);
11788 else
11789 avail = ctxt->input->buf->buffer->use -
11790 (ctxt->input->cur - ctxt->input->base);
11791 }
11792
11793 if ((ctxt->instate != XML_PARSER_EOF) &&
11794 (ctxt->instate != XML_PARSER_EPILOG)) {
11795 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11796 }
11797 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
11798 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11799 }
11800 if (ctxt->instate != XML_PARSER_EOF) {
11801 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11802 ctxt->sax->endDocument(ctxt->userData);
11803 }
11804 ctxt->instate = XML_PARSER_EOF;
11805 }
11806 return((xmlParserErrors) ctxt->errNo);
11807 }
11808
11809 /************************************************************************
11810 * *
11811 * I/O front end functions to the parser *
11812 * *
11813 ************************************************************************/
11814
11815 /**
11816 * xmlCreatePushParserCtxt:
11817 * @sax: a SAX handler
11818 * @user_data: The user data returned on SAX callbacks
11819 * @chunk: a pointer to an array of chars
11820 * @size: number of chars in the array
11821 * @filename: an optional file name or URI
11822 *
11823 * Create a parser context for using the XML parser in push mode.
11824 * If @buffer and @size are non-NULL, the data is used to detect
11825 * the encoding. The remaining characters will be parsed so they
11826 * don't need to be fed in again through xmlParseChunk.
11827 * To allow content encoding detection, @size should be >= 4
11828 * The value of @filename is used for fetching external entities
11829 * and error/warning reports.
11830 *
11831 * Returns the new parser context or NULL
11832 */
11833
11834 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11835 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11836 const char *chunk, int size, const char *filename) {
11837 xmlParserCtxtPtr ctxt;
11838 xmlParserInputPtr inputStream;
11839 xmlParserInputBufferPtr buf;
11840 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11841
11842 /*
11843 * plug some encoding conversion routines
11844 */
11845 if ((chunk != NULL) && (size >= 4))
11846 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11847
11848 buf = xmlAllocParserInputBuffer(enc);
11849 if (buf == NULL) return(NULL);
11850
11851 ctxt = xmlNewParserCtxt();
11852 if (ctxt == NULL) {
11853 xmlErrMemory(NULL, "creating parser: out of memory\n");
11854 xmlFreeParserInputBuffer(buf);
11855 return(NULL);
11856 }
11857 ctxt->dictNames = 1;
11858 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11859 if (ctxt->pushTab == NULL) {
11860 xmlErrMemory(ctxt, NULL);
11861 xmlFreeParserInputBuffer(buf);
11862 xmlFreeParserCtxt(ctxt);
11863 return(NULL);
11864 }
11865 if (sax != NULL) {
11866 #ifdef LIBXML_SAX1_ENABLED
11867 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11868 #endif /* LIBXML_SAX1_ENABLED */
11869 xmlFree(ctxt->sax);
11870 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11871 if (ctxt->sax == NULL) {
11872 xmlErrMemory(ctxt, NULL);
11873 xmlFreeParserInputBuffer(buf);
11874 xmlFreeParserCtxt(ctxt);
11875 return(NULL);
11876 }
11877 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11878 if (sax->initialized == XML_SAX2_MAGIC)
11879 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11880 else
11881 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11882 if (user_data != NULL)
11883 ctxt->userData = user_data;
11884 }
11885 if (filename == NULL) {
11886 ctxt->directory = NULL;
11887 } else {
11888 ctxt->directory = xmlParserGetDirectory(filename);
11889 }
11890
11891 inputStream = xmlNewInputStream(ctxt);
11892 if (inputStream == NULL) {
11893 xmlFreeParserCtxt(ctxt);
11894 xmlFreeParserInputBuffer(buf);
11895 return(NULL);
11896 }
11897
11898 if (filename == NULL)
11899 inputStream->filename = NULL;
11900 else {
11901 inputStream->filename = (char *)
11902 xmlCanonicPath((const xmlChar *) filename);
11903 if (inputStream->filename == NULL) {
11904 xmlFreeParserCtxt(ctxt);
11905 xmlFreeParserInputBuffer(buf);
11906 return(NULL);
11907 }
11908 }
11909 inputStream->buf = buf;
11910 inputStream->base = inputStream->buf->buffer->content;
11911 inputStream->cur = inputStream->buf->buffer->content;
11912 inputStream->end =
11913 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
11914
11915 inputPush(ctxt, inputStream);
11916
11917 /*
11918 * If the caller didn't provide an initial 'chunk' for determining
11919 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11920 * that it can be automatically determined later
11921 */
11922 if ((size == 0) || (chunk == NULL)) {
11923 ctxt->charset = XML_CHAR_ENCODING_NONE;
11924 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
11925 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11926 int cur = ctxt->input->cur - ctxt->input->base;
11927
11928 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11929
11930 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11931 ctxt->input->cur = ctxt->input->base + cur;
11932 ctxt->input->end =
11933 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
11934 #ifdef DEBUG_PUSH
11935 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11936 #endif
11937 }
11938
11939 if (enc != XML_CHAR_ENCODING_NONE) {
11940 xmlSwitchEncoding(ctxt, enc);
11941 }
11942
11943 return(ctxt);
11944 }
11945 #endif /* LIBXML_PUSH_ENABLED */
11946
11947 /**
11948 * xmlStopParser:
11949 * @ctxt: an XML parser context
11950 *
11951 * Blocks further parser processing
11952 */
11953 void
xmlStopParser(xmlParserCtxtPtr ctxt)11954 xmlStopParser(xmlParserCtxtPtr ctxt) {
11955 if (ctxt == NULL)
11956 return;
11957 ctxt->instate = XML_PARSER_EOF;
11958 ctxt->disableSAX = 1;
11959 if (ctxt->input != NULL) {
11960 ctxt->input->cur = BAD_CAST"";
11961 ctxt->input->base = ctxt->input->cur;
11962 }
11963 }
11964
11965 /**
11966 * xmlCreateIOParserCtxt:
11967 * @sax: a SAX handler
11968 * @user_data: The user data returned on SAX callbacks
11969 * @ioread: an I/O read function
11970 * @ioclose: an I/O close function
11971 * @ioctx: an I/O handler
11972 * @enc: the charset encoding if known
11973 *
11974 * Create a parser context for using the XML parser with an existing
11975 * I/O stream
11976 *
11977 * Returns the new parser context or NULL
11978 */
11979 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11980 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11981 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11982 void *ioctx, xmlCharEncoding enc) {
11983 xmlParserCtxtPtr ctxt;
11984 xmlParserInputPtr inputStream;
11985 xmlParserInputBufferPtr buf;
11986
11987 if (ioread == NULL) return(NULL);
11988
11989 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11990 if (buf == NULL) return(NULL);
11991
11992 ctxt = xmlNewParserCtxt();
11993 if (ctxt == NULL) {
11994 xmlFreeParserInputBuffer(buf);
11995 return(NULL);
11996 }
11997 if (sax != NULL) {
11998 #ifdef LIBXML_SAX1_ENABLED
11999 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12000 #endif /* LIBXML_SAX1_ENABLED */
12001 xmlFree(ctxt->sax);
12002 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12003 if (ctxt->sax == NULL) {
12004 xmlErrMemory(ctxt, NULL);
12005 xmlFreeParserCtxt(ctxt);
12006 return(NULL);
12007 }
12008 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12009 if (sax->initialized == XML_SAX2_MAGIC)
12010 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12011 else
12012 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12013 if (user_data != NULL)
12014 ctxt->userData = user_data;
12015 }
12016
12017 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12018 if (inputStream == NULL) {
12019 xmlFreeParserCtxt(ctxt);
12020 return(NULL);
12021 }
12022 inputPush(ctxt, inputStream);
12023
12024 return(ctxt);
12025 }
12026
12027 #ifdef LIBXML_VALID_ENABLED
12028 /************************************************************************
12029 * *
12030 * Front ends when parsing a DTD *
12031 * *
12032 ************************************************************************/
12033
12034 /**
12035 * xmlIOParseDTD:
12036 * @sax: the SAX handler block or NULL
12037 * @input: an Input Buffer
12038 * @enc: the charset encoding if known
12039 *
12040 * Load and parse a DTD
12041 *
12042 * Returns the resulting xmlDtdPtr or NULL in case of error.
12043 * @input will be freed by the function in any case.
12044 */
12045
12046 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)12047 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12048 xmlCharEncoding enc) {
12049 xmlDtdPtr ret = NULL;
12050 xmlParserCtxtPtr ctxt;
12051 xmlParserInputPtr pinput = NULL;
12052 xmlChar start[4];
12053
12054 if (input == NULL)
12055 return(NULL);
12056
12057 ctxt = xmlNewParserCtxt();
12058 if (ctxt == NULL) {
12059 xmlFreeParserInputBuffer(input);
12060 return(NULL);
12061 }
12062
12063 /*
12064 * Set-up the SAX context
12065 */
12066 if (sax != NULL) {
12067 if (ctxt->sax != NULL)
12068 xmlFree(ctxt->sax);
12069 ctxt->sax = sax;
12070 ctxt->userData = ctxt;
12071 }
12072 xmlDetectSAX2(ctxt);
12073
12074 /*
12075 * generate a parser input from the I/O handler
12076 */
12077
12078 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12079 if (pinput == NULL) {
12080 if (sax != NULL) ctxt->sax = NULL;
12081 xmlFreeParserInputBuffer(input);
12082 xmlFreeParserCtxt(ctxt);
12083 return(NULL);
12084 }
12085
12086 /*
12087 * plug some encoding conversion routines here.
12088 */
12089 if (xmlPushInput(ctxt, pinput) < 0) {
12090 if (sax != NULL) ctxt->sax = NULL;
12091 xmlFreeParserCtxt(ctxt);
12092 return(NULL);
12093 }
12094 if (enc != XML_CHAR_ENCODING_NONE) {
12095 xmlSwitchEncoding(ctxt, enc);
12096 }
12097
12098 pinput->filename = NULL;
12099 pinput->line = 1;
12100 pinput->col = 1;
12101 pinput->base = ctxt->input->cur;
12102 pinput->cur = ctxt->input->cur;
12103 pinput->free = NULL;
12104
12105 /*
12106 * let's parse that entity knowing it's an external subset.
12107 */
12108 ctxt->inSubset = 2;
12109 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12110 if (ctxt->myDoc == NULL) {
12111 xmlErrMemory(ctxt, "New Doc failed");
12112 return(NULL);
12113 }
12114 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12115 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12116 BAD_CAST "none", BAD_CAST "none");
12117
12118 if ((enc == XML_CHAR_ENCODING_NONE) &&
12119 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12120 /*
12121 * Get the 4 first bytes and decode the charset
12122 * if enc != XML_CHAR_ENCODING_NONE
12123 * plug some encoding conversion routines.
12124 */
12125 start[0] = RAW;
12126 start[1] = NXT(1);
12127 start[2] = NXT(2);
12128 start[3] = NXT(3);
12129 enc = xmlDetectCharEncoding(start, 4);
12130 if (enc != XML_CHAR_ENCODING_NONE) {
12131 xmlSwitchEncoding(ctxt, enc);
12132 }
12133 }
12134
12135 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12136
12137 if (ctxt->myDoc != NULL) {
12138 if (ctxt->wellFormed) {
12139 ret = ctxt->myDoc->extSubset;
12140 ctxt->myDoc->extSubset = NULL;
12141 if (ret != NULL) {
12142 xmlNodePtr tmp;
12143
12144 ret->doc = NULL;
12145 tmp = ret->children;
12146 while (tmp != NULL) {
12147 tmp->doc = NULL;
12148 tmp = tmp->next;
12149 }
12150 }
12151 } else {
12152 ret = NULL;
12153 }
12154 xmlFreeDoc(ctxt->myDoc);
12155 ctxt->myDoc = NULL;
12156 }
12157 if (sax != NULL) ctxt->sax = NULL;
12158 xmlFreeParserCtxt(ctxt);
12159
12160 return(ret);
12161 }
12162
12163 /**
12164 * xmlSAXParseDTD:
12165 * @sax: the SAX handler block
12166 * @ExternalID: a NAME* containing the External ID of the DTD
12167 * @SystemID: a NAME* containing the URL to the DTD
12168 *
12169 * Load and parse an external subset.
12170 *
12171 * Returns the resulting xmlDtdPtr or NULL in case of error.
12172 */
12173
12174 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)12175 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12176 const xmlChar *SystemID) {
12177 xmlDtdPtr ret = NULL;
12178 xmlParserCtxtPtr ctxt;
12179 xmlParserInputPtr input = NULL;
12180 xmlCharEncoding enc;
12181 xmlChar* systemIdCanonic;
12182
12183 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12184
12185 ctxt = xmlNewParserCtxt();
12186 if (ctxt == NULL) {
12187 return(NULL);
12188 }
12189
12190 /*
12191 * Set-up the SAX context
12192 */
12193 if (sax != NULL) {
12194 if (ctxt->sax != NULL)
12195 xmlFree(ctxt->sax);
12196 ctxt->sax = sax;
12197 ctxt->userData = ctxt;
12198 }
12199
12200 /*
12201 * Canonicalise the system ID
12202 */
12203 systemIdCanonic = xmlCanonicPath(SystemID);
12204 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12205 xmlFreeParserCtxt(ctxt);
12206 return(NULL);
12207 }
12208
12209 /*
12210 * Ask the Entity resolver to load the damn thing
12211 */
12212
12213 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12214 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12215 systemIdCanonic);
12216 if (input == NULL) {
12217 if (sax != NULL) ctxt->sax = NULL;
12218 xmlFreeParserCtxt(ctxt);
12219 if (systemIdCanonic != NULL)
12220 xmlFree(systemIdCanonic);
12221 return(NULL);
12222 }
12223
12224 /*
12225 * plug some encoding conversion routines here.
12226 */
12227 if (xmlPushInput(ctxt, input) < 0) {
12228 if (sax != NULL) ctxt->sax = NULL;
12229 xmlFreeParserCtxt(ctxt);
12230 if (systemIdCanonic != NULL)
12231 xmlFree(systemIdCanonic);
12232 return(NULL);
12233 }
12234 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12235 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12236 xmlSwitchEncoding(ctxt, enc);
12237 }
12238
12239 if (input->filename == NULL)
12240 input->filename = (char *) systemIdCanonic;
12241 else
12242 xmlFree(systemIdCanonic);
12243 input->line = 1;
12244 input->col = 1;
12245 input->base = ctxt->input->cur;
12246 input->cur = ctxt->input->cur;
12247 input->free = NULL;
12248
12249 /*
12250 * let's parse that entity knowing it's an external subset.
12251 */
12252 ctxt->inSubset = 2;
12253 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12254 if (ctxt->myDoc == NULL) {
12255 xmlErrMemory(ctxt, "New Doc failed");
12256 if (sax != NULL) ctxt->sax = NULL;
12257 xmlFreeParserCtxt(ctxt);
12258 return(NULL);
12259 }
12260 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12261 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12262 ExternalID, SystemID);
12263 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12264
12265 if (ctxt->myDoc != NULL) {
12266 if (ctxt->wellFormed) {
12267 ret = ctxt->myDoc->extSubset;
12268 ctxt->myDoc->extSubset = NULL;
12269 if (ret != NULL) {
12270 xmlNodePtr tmp;
12271
12272 ret->doc = NULL;
12273 tmp = ret->children;
12274 while (tmp != NULL) {
12275 tmp->doc = NULL;
12276 tmp = tmp->next;
12277 }
12278 }
12279 } else {
12280 ret = NULL;
12281 }
12282 xmlFreeDoc(ctxt->myDoc);
12283 ctxt->myDoc = NULL;
12284 }
12285 if (sax != NULL) ctxt->sax = NULL;
12286 xmlFreeParserCtxt(ctxt);
12287
12288 return(ret);
12289 }
12290
12291
12292 /**
12293 * xmlParseDTD:
12294 * @ExternalID: a NAME* containing the External ID of the DTD
12295 * @SystemID: a NAME* containing the URL to the DTD
12296 *
12297 * Load and parse an external subset.
12298 *
12299 * Returns the resulting xmlDtdPtr or NULL in case of error.
12300 */
12301
12302 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)12303 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12304 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12305 }
12306 #endif /* LIBXML_VALID_ENABLED */
12307
12308 /************************************************************************
12309 * *
12310 * Front ends when parsing an Entity *
12311 * *
12312 ************************************************************************/
12313
12314 /**
12315 * xmlParseCtxtExternalEntity:
12316 * @ctx: the existing parsing context
12317 * @URL: the URL for the entity to load
12318 * @ID: the System ID for the entity to load
12319 * @lst: the return value for the set of parsed nodes
12320 *
12321 * Parse an external general entity within an existing parsing context
12322 * An external general parsed entity is well-formed if it matches the
12323 * production labeled extParsedEnt.
12324 *
12325 * [78] extParsedEnt ::= TextDecl? content
12326 *
12327 * Returns 0 if the entity is well formed, -1 in case of args problem and
12328 * the parser error code otherwise
12329 */
12330
12331 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12332 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12333 const xmlChar *ID, xmlNodePtr *lst) {
12334 xmlParserCtxtPtr ctxt;
12335 xmlDocPtr newDoc;
12336 xmlNodePtr newRoot;
12337 xmlSAXHandlerPtr oldsax = NULL;
12338 int ret = 0;
12339 xmlChar start[4];
12340 xmlCharEncoding enc;
12341
12342 if (ctx == NULL) return(-1);
12343
12344 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12345 (ctx->depth > 1024)) {
12346 return(XML_ERR_ENTITY_LOOP);
12347 }
12348
12349 if (lst != NULL)
12350 *lst = NULL;
12351 if ((URL == NULL) && (ID == NULL))
12352 return(-1);
12353 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12354 return(-1);
12355
12356 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
12357 if (ctxt == NULL) {
12358 return(-1);
12359 }
12360
12361 oldsax = ctxt->sax;
12362 ctxt->sax = ctx->sax;
12363 xmlDetectSAX2(ctxt);
12364 newDoc = xmlNewDoc(BAD_CAST "1.0");
12365 if (newDoc == NULL) {
12366 xmlFreeParserCtxt(ctxt);
12367 return(-1);
12368 }
12369 newDoc->properties = XML_DOC_INTERNAL;
12370 if (ctx->myDoc->dict) {
12371 newDoc->dict = ctx->myDoc->dict;
12372 xmlDictReference(newDoc->dict);
12373 }
12374 if (ctx->myDoc != NULL) {
12375 newDoc->intSubset = ctx->myDoc->intSubset;
12376 newDoc->extSubset = ctx->myDoc->extSubset;
12377 }
12378 if (ctx->myDoc->URL != NULL) {
12379 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12380 }
12381 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12382 if (newRoot == NULL) {
12383 ctxt->sax = oldsax;
12384 xmlFreeParserCtxt(ctxt);
12385 newDoc->intSubset = NULL;
12386 newDoc->extSubset = NULL;
12387 xmlFreeDoc(newDoc);
12388 return(-1);
12389 }
12390 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12391 nodePush(ctxt, newDoc->children);
12392 if (ctx->myDoc == NULL) {
12393 ctxt->myDoc = newDoc;
12394 } else {
12395 ctxt->myDoc = ctx->myDoc;
12396 newDoc->children->doc = ctx->myDoc;
12397 }
12398
12399 /*
12400 * Get the 4 first bytes and decode the charset
12401 * if enc != XML_CHAR_ENCODING_NONE
12402 * plug some encoding conversion routines.
12403 */
12404 GROW
12405 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12406 start[0] = RAW;
12407 start[1] = NXT(1);
12408 start[2] = NXT(2);
12409 start[3] = NXT(3);
12410 enc = xmlDetectCharEncoding(start, 4);
12411 if (enc != XML_CHAR_ENCODING_NONE) {
12412 xmlSwitchEncoding(ctxt, enc);
12413 }
12414 }
12415
12416 /*
12417 * Parse a possible text declaration first
12418 */
12419 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12420 xmlParseTextDecl(ctxt);
12421 /*
12422 * An XML-1.0 document can't reference an entity not XML-1.0
12423 */
12424 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12425 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12426 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12427 "Version mismatch between document and entity\n");
12428 }
12429 }
12430
12431 /*
12432 * Doing validity checking on chunk doesn't make sense
12433 */
12434 ctxt->instate = XML_PARSER_CONTENT;
12435 ctxt->validate = ctx->validate;
12436 ctxt->valid = ctx->valid;
12437 ctxt->loadsubset = ctx->loadsubset;
12438 ctxt->depth = ctx->depth + 1;
12439 ctxt->replaceEntities = ctx->replaceEntities;
12440 if (ctxt->validate) {
12441 ctxt->vctxt.error = ctx->vctxt.error;
12442 ctxt->vctxt.warning = ctx->vctxt.warning;
12443 } else {
12444 ctxt->vctxt.error = NULL;
12445 ctxt->vctxt.warning = NULL;
12446 }
12447 ctxt->vctxt.nodeTab = NULL;
12448 ctxt->vctxt.nodeNr = 0;
12449 ctxt->vctxt.nodeMax = 0;
12450 ctxt->vctxt.node = NULL;
12451 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12452 ctxt->dict = ctx->dict;
12453 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12454 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12455 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12456 ctxt->dictNames = ctx->dictNames;
12457 ctxt->attsDefault = ctx->attsDefault;
12458 ctxt->attsSpecial = ctx->attsSpecial;
12459 ctxt->linenumbers = ctx->linenumbers;
12460
12461 xmlParseContent(ctxt);
12462
12463 ctx->validate = ctxt->validate;
12464 ctx->valid = ctxt->valid;
12465 if ((RAW == '<') && (NXT(1) == '/')) {
12466 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12467 } else if (RAW != 0) {
12468 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12469 }
12470 if (ctxt->node != newDoc->children) {
12471 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12472 }
12473
12474 if (!ctxt->wellFormed) {
12475 if (ctxt->errNo == 0)
12476 ret = 1;
12477 else
12478 ret = ctxt->errNo;
12479 } else {
12480 if (lst != NULL) {
12481 xmlNodePtr cur;
12482
12483 /*
12484 * Return the newly created nodeset after unlinking it from
12485 * they pseudo parent.
12486 */
12487 cur = newDoc->children->children;
12488 *lst = cur;
12489 while (cur != NULL) {
12490 cur->parent = NULL;
12491 cur = cur->next;
12492 }
12493 newDoc->children->children = NULL;
12494 }
12495 ret = 0;
12496 }
12497 ctxt->sax = oldsax;
12498 ctxt->dict = NULL;
12499 ctxt->attsDefault = NULL;
12500 ctxt->attsSpecial = NULL;
12501 xmlFreeParserCtxt(ctxt);
12502 newDoc->intSubset = NULL;
12503 newDoc->extSubset = NULL;
12504 xmlFreeDoc(newDoc);
12505
12506 return(ret);
12507 }
12508
12509 /**
12510 * xmlParseExternalEntityPrivate:
12511 * @doc: the document the chunk pertains to
12512 * @oldctxt: the previous parser context if available
12513 * @sax: the SAX handler bloc (possibly NULL)
12514 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12515 * @depth: Used for loop detection, use 0
12516 * @URL: the URL for the entity to load
12517 * @ID: the System ID for the entity to load
12518 * @list: the return value for the set of parsed nodes
12519 *
12520 * Private version of xmlParseExternalEntity()
12521 *
12522 * Returns 0 if the entity is well formed, -1 in case of args problem and
12523 * the parser error code otherwise
12524 */
12525
12526 static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc,xmlParserCtxtPtr oldctxt,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12527 xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12528 xmlSAXHandlerPtr sax,
12529 void *user_data, int depth, const xmlChar *URL,
12530 const xmlChar *ID, xmlNodePtr *list) {
12531 xmlParserCtxtPtr ctxt;
12532 xmlDocPtr newDoc;
12533 xmlNodePtr newRoot;
12534 xmlSAXHandlerPtr oldsax = NULL;
12535 xmlParserErrors ret = XML_ERR_OK;
12536 xmlChar start[4];
12537 xmlCharEncoding enc;
12538
12539 if (((depth > 40) &&
12540 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12541 (depth > 1024)) {
12542 return(XML_ERR_ENTITY_LOOP);
12543 }
12544
12545 if (list != NULL)
12546 *list = NULL;
12547 if ((URL == NULL) && (ID == NULL))
12548 return(XML_ERR_INTERNAL_ERROR);
12549 if (doc == NULL)
12550 return(XML_ERR_INTERNAL_ERROR);
12551
12552
12553 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
12554 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12555 ctxt->userData = ctxt;
12556 if (oldctxt != NULL) {
12557 ctxt->_private = oldctxt->_private;
12558 ctxt->loadsubset = oldctxt->loadsubset;
12559 ctxt->validate = oldctxt->validate;
12560 ctxt->external = oldctxt->external;
12561 ctxt->record_info = oldctxt->record_info;
12562 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12563 ctxt->node_seq.length = oldctxt->node_seq.length;
12564 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12565 } else {
12566 /*
12567 * Doing validity checking on chunk without context
12568 * doesn't make sense
12569 */
12570 ctxt->_private = NULL;
12571 ctxt->validate = 0;
12572 ctxt->external = 2;
12573 ctxt->loadsubset = 0;
12574 }
12575 if (sax != NULL) {
12576 oldsax = ctxt->sax;
12577 ctxt->sax = sax;
12578 if (user_data != NULL)
12579 ctxt->userData = user_data;
12580 }
12581 xmlDetectSAX2(ctxt);
12582 newDoc = xmlNewDoc(BAD_CAST "1.0");
12583 if (newDoc == NULL) {
12584 ctxt->node_seq.maximum = 0;
12585 ctxt->node_seq.length = 0;
12586 ctxt->node_seq.buffer = NULL;
12587 xmlFreeParserCtxt(ctxt);
12588 return(XML_ERR_INTERNAL_ERROR);
12589 }
12590 newDoc->properties = XML_DOC_INTERNAL;
12591 newDoc->intSubset = doc->intSubset;
12592 newDoc->extSubset = doc->extSubset;
12593 newDoc->dict = doc->dict;
12594 xmlDictReference(newDoc->dict);
12595
12596 if (doc->URL != NULL) {
12597 newDoc->URL = xmlStrdup(doc->URL);
12598 }
12599 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12600 if (newRoot == NULL) {
12601 if (sax != NULL)
12602 ctxt->sax = oldsax;
12603 ctxt->node_seq.maximum = 0;
12604 ctxt->node_seq.length = 0;
12605 ctxt->node_seq.buffer = NULL;
12606 xmlFreeParserCtxt(ctxt);
12607 newDoc->intSubset = NULL;
12608 newDoc->extSubset = NULL;
12609 xmlFreeDoc(newDoc);
12610 return(XML_ERR_INTERNAL_ERROR);
12611 }
12612 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12613 nodePush(ctxt, newDoc->children);
12614 ctxt->myDoc = doc;
12615 newRoot->doc = doc;
12616
12617 /*
12618 * Get the 4 first bytes and decode the charset
12619 * if enc != XML_CHAR_ENCODING_NONE
12620 * plug some encoding conversion routines.
12621 */
12622 GROW;
12623 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12624 start[0] = RAW;
12625 start[1] = NXT(1);
12626 start[2] = NXT(2);
12627 start[3] = NXT(3);
12628 enc = xmlDetectCharEncoding(start, 4);
12629 if (enc != XML_CHAR_ENCODING_NONE) {
12630 xmlSwitchEncoding(ctxt, enc);
12631 }
12632 }
12633
12634 /*
12635 * Parse a possible text declaration first
12636 */
12637 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12638 xmlParseTextDecl(ctxt);
12639 }
12640
12641 ctxt->instate = XML_PARSER_CONTENT;
12642 ctxt->depth = depth;
12643
12644 xmlParseContent(ctxt);
12645
12646 if ((RAW == '<') && (NXT(1) == '/')) {
12647 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12648 } else if (RAW != 0) {
12649 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12650 }
12651 if (ctxt->node != newDoc->children) {
12652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12653 }
12654
12655 if (!ctxt->wellFormed) {
12656 if (ctxt->errNo == 0)
12657 ret = XML_ERR_INTERNAL_ERROR;
12658 else
12659 ret = (xmlParserErrors)ctxt->errNo;
12660 } else {
12661 if (list != NULL) {
12662 xmlNodePtr cur;
12663
12664 /*
12665 * Return the newly created nodeset after unlinking it from
12666 * they pseudo parent.
12667 */
12668 cur = newDoc->children->children;
12669 *list = cur;
12670 while (cur != NULL) {
12671 cur->parent = NULL;
12672 cur = cur->next;
12673 }
12674 newDoc->children->children = NULL;
12675 }
12676 ret = XML_ERR_OK;
12677 }
12678
12679 /*
12680 * Record in the parent context the number of entities replacement
12681 * done when parsing that reference.
12682 */
12683 if (oldctxt != NULL)
12684 oldctxt->nbentities += ctxt->nbentities;
12685
12686 /*
12687 * Also record the size of the entity parsed
12688 */
12689 if (ctxt->input != NULL) {
12690 oldctxt->sizeentities += ctxt->input->consumed;
12691 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12692 }
12693 /*
12694 * And record the last error if any
12695 */
12696 if (ctxt->lastError.code != XML_ERR_OK)
12697 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12698
12699 if (sax != NULL)
12700 ctxt->sax = oldsax;
12701 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12702 oldctxt->node_seq.length = ctxt->node_seq.length;
12703 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12704 ctxt->node_seq.maximum = 0;
12705 ctxt->node_seq.length = 0;
12706 ctxt->node_seq.buffer = NULL;
12707 xmlFreeParserCtxt(ctxt);
12708 newDoc->intSubset = NULL;
12709 newDoc->extSubset = NULL;
12710 xmlFreeDoc(newDoc);
12711
12712 return(ret);
12713 }
12714
12715 #ifdef LIBXML_SAX1_ENABLED
12716 /**
12717 * xmlParseExternalEntity:
12718 * @doc: the document the chunk pertains to
12719 * @sax: the SAX handler bloc (possibly NULL)
12720 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12721 * @depth: Used for loop detection, use 0
12722 * @URL: the URL for the entity to load
12723 * @ID: the System ID for the entity to load
12724 * @lst: the return value for the set of parsed nodes
12725 *
12726 * Parse an external general entity
12727 * An external general parsed entity is well-formed if it matches the
12728 * production labeled extParsedEnt.
12729 *
12730 * [78] extParsedEnt ::= TextDecl? content
12731 *
12732 * Returns 0 if the entity is well formed, -1 in case of args problem and
12733 * the parser error code otherwise
12734 */
12735
12736 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * lst)12737 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12738 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12739 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12740 ID, lst));
12741 }
12742
12743 /**
12744 * xmlParseBalancedChunkMemory:
12745 * @doc: the document the chunk pertains to
12746 * @sax: the SAX handler bloc (possibly NULL)
12747 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12748 * @depth: Used for loop detection, use 0
12749 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12750 * @lst: the return value for the set of parsed nodes
12751 *
12752 * Parse a well-balanced chunk of an XML document
12753 * called by the parser
12754 * The allowed sequence for the Well Balanced Chunk is the one defined by
12755 * the content production in the XML grammar:
12756 *
12757 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12758 *
12759 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12760 * the parser error code otherwise
12761 */
12762
12763 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12764 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12765 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12766 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12767 depth, string, lst, 0 );
12768 }
12769 #endif /* LIBXML_SAX1_ENABLED */
12770
12771 /**
12772 * xmlParseBalancedChunkMemoryInternal:
12773 * @oldctxt: the existing parsing context
12774 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12775 * @user_data: the user data field for the parser context
12776 * @lst: the return value for the set of parsed nodes
12777 *
12778 *
12779 * Parse a well-balanced chunk of an XML document
12780 * called by the parser
12781 * The allowed sequence for the Well Balanced Chunk is the one defined by
12782 * the content production in the XML grammar:
12783 *
12784 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12785 *
12786 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12787 * error code otherwise
12788 *
12789 * In case recover is set to 1, the nodelist will not be empty even if
12790 * the parsed chunk is not well balanced.
12791 */
12792 static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,const xmlChar * string,void * user_data,xmlNodePtr * lst)12793 xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12794 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12795 xmlParserCtxtPtr ctxt;
12796 xmlDocPtr newDoc = NULL;
12797 xmlNodePtr newRoot;
12798 xmlSAXHandlerPtr oldsax = NULL;
12799 xmlNodePtr content = NULL;
12800 xmlNodePtr last = NULL;
12801 int size;
12802 xmlParserErrors ret = XML_ERR_OK;
12803 #ifdef SAX2
12804 int i;
12805 #endif
12806
12807 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12808 (oldctxt->depth > 1024)) {
12809 return(XML_ERR_ENTITY_LOOP);
12810 }
12811
12812
12813 if (lst != NULL)
12814 *lst = NULL;
12815 if (string == NULL)
12816 return(XML_ERR_INTERNAL_ERROR);
12817
12818 size = xmlStrlen(string);
12819
12820 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12821 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12822 if (user_data != NULL)
12823 ctxt->userData = user_data;
12824 else
12825 ctxt->userData = ctxt;
12826 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12827 ctxt->dict = oldctxt->dict;
12828 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12829 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12830 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12831
12832 #ifdef SAX2
12833 /* propagate namespaces down the entity */
12834 for (i = 0;i < oldctxt->nsNr;i += 2) {
12835 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12836 }
12837 #endif
12838
12839 oldsax = ctxt->sax;
12840 ctxt->sax = oldctxt->sax;
12841 xmlDetectSAX2(ctxt);
12842 ctxt->replaceEntities = oldctxt->replaceEntities;
12843 ctxt->options = oldctxt->options;
12844
12845 ctxt->_private = oldctxt->_private;
12846 if (oldctxt->myDoc == NULL) {
12847 newDoc = xmlNewDoc(BAD_CAST "1.0");
12848 if (newDoc == NULL) {
12849 ctxt->sax = oldsax;
12850 ctxt->dict = NULL;
12851 xmlFreeParserCtxt(ctxt);
12852 return(XML_ERR_INTERNAL_ERROR);
12853 }
12854 newDoc->properties = XML_DOC_INTERNAL;
12855 newDoc->dict = ctxt->dict;
12856 xmlDictReference(newDoc->dict);
12857 ctxt->myDoc = newDoc;
12858 } else {
12859 ctxt->myDoc = oldctxt->myDoc;
12860 content = ctxt->myDoc->children;
12861 last = ctxt->myDoc->last;
12862 }
12863 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12864 if (newRoot == NULL) {
12865 ctxt->sax = oldsax;
12866 ctxt->dict = NULL;
12867 xmlFreeParserCtxt(ctxt);
12868 if (newDoc != NULL) {
12869 xmlFreeDoc(newDoc);
12870 }
12871 return(XML_ERR_INTERNAL_ERROR);
12872 }
12873 ctxt->myDoc->children = NULL;
12874 ctxt->myDoc->last = NULL;
12875 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12876 nodePush(ctxt, ctxt->myDoc->children);
12877 ctxt->instate = XML_PARSER_CONTENT;
12878 ctxt->depth = oldctxt->depth + 1;
12879
12880 ctxt->validate = 0;
12881 ctxt->loadsubset = oldctxt->loadsubset;
12882 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12883 /*
12884 * ID/IDREF registration will be done in xmlValidateElement below
12885 */
12886 ctxt->loadsubset |= XML_SKIP_IDS;
12887 }
12888 ctxt->dictNames = oldctxt->dictNames;
12889 ctxt->attsDefault = oldctxt->attsDefault;
12890 ctxt->attsSpecial = oldctxt->attsSpecial;
12891
12892 xmlParseContent(ctxt);
12893 if ((RAW == '<') && (NXT(1) == '/')) {
12894 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12895 } else if (RAW != 0) {
12896 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12897 }
12898 if (ctxt->node != ctxt->myDoc->children) {
12899 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12900 }
12901
12902 if (!ctxt->wellFormed) {
12903 if (ctxt->errNo == 0)
12904 ret = XML_ERR_INTERNAL_ERROR;
12905 else
12906 ret = (xmlParserErrors)ctxt->errNo;
12907 } else {
12908 ret = XML_ERR_OK;
12909 }
12910
12911 if ((lst != NULL) && (ret == XML_ERR_OK)) {
12912 xmlNodePtr cur;
12913
12914 /*
12915 * Return the newly created nodeset after unlinking it from
12916 * they pseudo parent.
12917 */
12918 cur = ctxt->myDoc->children->children;
12919 *lst = cur;
12920 while (cur != NULL) {
12921 #ifdef LIBXML_VALID_ENABLED
12922 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12923 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12924 (cur->type == XML_ELEMENT_NODE)) {
12925 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12926 oldctxt->myDoc, cur);
12927 }
12928 #endif /* LIBXML_VALID_ENABLED */
12929 cur->parent = NULL;
12930 cur = cur->next;
12931 }
12932 ctxt->myDoc->children->children = NULL;
12933 }
12934 if (ctxt->myDoc != NULL) {
12935 xmlFreeNode(ctxt->myDoc->children);
12936 ctxt->myDoc->children = content;
12937 ctxt->myDoc->last = last;
12938 }
12939
12940 /*
12941 * Record in the parent context the number of entities replacement
12942 * done when parsing that reference.
12943 */
12944 if (oldctxt != NULL)
12945 oldctxt->nbentities += ctxt->nbentities;
12946
12947 /*
12948 * Also record the last error if any
12949 */
12950 if (ctxt->lastError.code != XML_ERR_OK)
12951 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12952
12953 ctxt->sax = oldsax;
12954 ctxt->dict = NULL;
12955 ctxt->attsDefault = NULL;
12956 ctxt->attsSpecial = NULL;
12957 xmlFreeParserCtxt(ctxt);
12958 if (newDoc != NULL) {
12959 xmlFreeDoc(newDoc);
12960 }
12961
12962 return(ret);
12963 }
12964
12965 /**
12966 * xmlParseInNodeContext:
12967 * @node: the context node
12968 * @data: the input string
12969 * @datalen: the input string length in bytes
12970 * @options: a combination of xmlParserOption
12971 * @lst: the return value for the set of parsed nodes
12972 *
12973 * Parse a well-balanced chunk of an XML document
12974 * within the context (DTD, namespaces, etc ...) of the given node.
12975 *
12976 * The allowed sequence for the data is a Well Balanced Chunk defined by
12977 * the content production in the XML grammar:
12978 *
12979 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12980 *
12981 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12982 * error code otherwise
12983 */
12984 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * lst)12985 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12986 int options, xmlNodePtr *lst) {
12987 #ifdef SAX2
12988 xmlParserCtxtPtr ctxt;
12989 xmlDocPtr doc = NULL;
12990 xmlNodePtr fake, cur;
12991 int nsnr = 0;
12992
12993 xmlParserErrors ret = XML_ERR_OK;
12994
12995 /*
12996 * check all input parameters, grab the document
12997 */
12998 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12999 return(XML_ERR_INTERNAL_ERROR);
13000 switch (node->type) {
13001 case XML_ELEMENT_NODE:
13002 case XML_ATTRIBUTE_NODE:
13003 case XML_TEXT_NODE:
13004 case XML_CDATA_SECTION_NODE:
13005 case XML_ENTITY_REF_NODE:
13006 case XML_PI_NODE:
13007 case XML_COMMENT_NODE:
13008 case XML_DOCUMENT_NODE:
13009 case XML_HTML_DOCUMENT_NODE:
13010 break;
13011 default:
13012 return(XML_ERR_INTERNAL_ERROR);
13013
13014 }
13015 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13016 (node->type != XML_DOCUMENT_NODE) &&
13017 (node->type != XML_HTML_DOCUMENT_NODE))
13018 node = node->parent;
13019 if (node == NULL)
13020 return(XML_ERR_INTERNAL_ERROR);
13021 if (node->type == XML_ELEMENT_NODE)
13022 doc = node->doc;
13023 else
13024 doc = (xmlDocPtr) node;
13025 if (doc == NULL)
13026 return(XML_ERR_INTERNAL_ERROR);
13027
13028 /*
13029 * allocate a context and set-up everything not related to the
13030 * node position in the tree
13031 */
13032 if (doc->type == XML_DOCUMENT_NODE)
13033 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13034 #ifdef LIBXML_HTML_ENABLED
13035 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13036 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13037 /*
13038 * When parsing in context, it makes no sense to add implied
13039 * elements like html/body/etc...
13040 */
13041 options |= HTML_PARSE_NOIMPLIED;
13042 }
13043 #endif
13044 else
13045 return(XML_ERR_INTERNAL_ERROR);
13046
13047 if (ctxt == NULL)
13048 return(XML_ERR_NO_MEMORY);
13049
13050 /*
13051 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13052 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13053 * we must wait until the last moment to free the original one.
13054 */
13055 if (doc->dict != NULL) {
13056 if (ctxt->dict != NULL)
13057 xmlDictFree(ctxt->dict);
13058 ctxt->dict = doc->dict;
13059 } else
13060 options |= XML_PARSE_NODICT;
13061
13062 if (doc->encoding != NULL) {
13063 xmlCharEncodingHandlerPtr hdlr;
13064
13065 if (ctxt->encoding != NULL)
13066 xmlFree((xmlChar *) ctxt->encoding);
13067 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13068
13069 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13070 if (hdlr != NULL) {
13071 xmlSwitchToEncoding(ctxt, hdlr);
13072 } else {
13073 return(XML_ERR_UNSUPPORTED_ENCODING);
13074 }
13075 }
13076
13077 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13078 xmlDetectSAX2(ctxt);
13079 ctxt->myDoc = doc;
13080
13081 fake = xmlNewComment(NULL);
13082 if (fake == NULL) {
13083 xmlFreeParserCtxt(ctxt);
13084 return(XML_ERR_NO_MEMORY);
13085 }
13086 xmlAddChild(node, fake);
13087
13088 if (node->type == XML_ELEMENT_NODE) {
13089 nodePush(ctxt, node);
13090 /*
13091 * initialize the SAX2 namespaces stack
13092 */
13093 cur = node;
13094 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13095 xmlNsPtr ns = cur->nsDef;
13096 const xmlChar *iprefix, *ihref;
13097
13098 while (ns != NULL) {
13099 if (ctxt->dict) {
13100 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13101 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13102 } else {
13103 iprefix = ns->prefix;
13104 ihref = ns->href;
13105 }
13106
13107 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13108 nsPush(ctxt, iprefix, ihref);
13109 nsnr++;
13110 }
13111 ns = ns->next;
13112 }
13113 cur = cur->parent;
13114 }
13115 ctxt->instate = XML_PARSER_CONTENT;
13116 }
13117
13118 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13119 /*
13120 * ID/IDREF registration will be done in xmlValidateElement below
13121 */
13122 ctxt->loadsubset |= XML_SKIP_IDS;
13123 }
13124
13125 #ifdef LIBXML_HTML_ENABLED
13126 if (doc->type == XML_HTML_DOCUMENT_NODE)
13127 __htmlParseContent(ctxt);
13128 else
13129 #endif
13130 xmlParseContent(ctxt);
13131
13132 nsPop(ctxt, nsnr);
13133 if ((RAW == '<') && (NXT(1) == '/')) {
13134 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13135 } else if (RAW != 0) {
13136 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13137 }
13138 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13139 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13140 ctxt->wellFormed = 0;
13141 }
13142
13143 if (!ctxt->wellFormed) {
13144 if (ctxt->errNo == 0)
13145 ret = XML_ERR_INTERNAL_ERROR;
13146 else
13147 ret = (xmlParserErrors)ctxt->errNo;
13148 } else {
13149 ret = XML_ERR_OK;
13150 }
13151
13152 /*
13153 * Return the newly created nodeset after unlinking it from
13154 * the pseudo sibling.
13155 */
13156
13157 cur = fake->next;
13158 fake->next = NULL;
13159 node->last = fake;
13160
13161 if (cur != NULL) {
13162 cur->prev = NULL;
13163 }
13164
13165 *lst = cur;
13166
13167 while (cur != NULL) {
13168 cur->parent = NULL;
13169 cur = cur->next;
13170 }
13171
13172 xmlUnlinkNode(fake);
13173 xmlFreeNode(fake);
13174
13175
13176 if (ret != XML_ERR_OK) {
13177 xmlFreeNodeList(*lst);
13178 *lst = NULL;
13179 }
13180
13181 if (doc->dict != NULL)
13182 ctxt->dict = NULL;
13183 xmlFreeParserCtxt(ctxt);
13184
13185 return(ret);
13186 #else /* !SAX2 */
13187 return(XML_ERR_INTERNAL_ERROR);
13188 #endif
13189 }
13190
13191 #ifdef LIBXML_SAX1_ENABLED
13192 /**
13193 * xmlParseBalancedChunkMemoryRecover:
13194 * @doc: the document the chunk pertains to
13195 * @sax: the SAX handler bloc (possibly NULL)
13196 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13197 * @depth: Used for loop detection, use 0
13198 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13199 * @lst: the return value for the set of parsed nodes
13200 * @recover: return nodes even if the data is broken (use 0)
13201 *
13202 *
13203 * Parse a well-balanced chunk of an XML document
13204 * called by the parser
13205 * The allowed sequence for the Well Balanced Chunk is the one defined by
13206 * the content production in the XML grammar:
13207 *
13208 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13209 *
13210 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13211 * the parser error code otherwise
13212 *
13213 * In case recover is set to 1, the nodelist will not be empty even if
13214 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13215 * some extent.
13216 */
13217 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst,int recover)13218 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13219 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13220 int recover) {
13221 xmlParserCtxtPtr ctxt;
13222 xmlDocPtr newDoc;
13223 xmlSAXHandlerPtr oldsax = NULL;
13224 xmlNodePtr content, newRoot;
13225 int size;
13226 int ret = 0;
13227
13228 if (depth > 40) {
13229 return(XML_ERR_ENTITY_LOOP);
13230 }
13231
13232
13233 if (lst != NULL)
13234 *lst = NULL;
13235 if (string == NULL)
13236 return(-1);
13237
13238 size = xmlStrlen(string);
13239
13240 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13241 if (ctxt == NULL) return(-1);
13242 ctxt->userData = ctxt;
13243 if (sax != NULL) {
13244 oldsax = ctxt->sax;
13245 ctxt->sax = sax;
13246 if (user_data != NULL)
13247 ctxt->userData = user_data;
13248 }
13249 newDoc = xmlNewDoc(BAD_CAST "1.0");
13250 if (newDoc == NULL) {
13251 xmlFreeParserCtxt(ctxt);
13252 return(-1);
13253 }
13254 newDoc->properties = XML_DOC_INTERNAL;
13255 if ((doc != NULL) && (doc->dict != NULL)) {
13256 xmlDictFree(ctxt->dict);
13257 ctxt->dict = doc->dict;
13258 xmlDictReference(ctxt->dict);
13259 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13260 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13261 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13262 ctxt->dictNames = 1;
13263 } else {
13264 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13265 }
13266 if (doc != NULL) {
13267 newDoc->intSubset = doc->intSubset;
13268 newDoc->extSubset = doc->extSubset;
13269 }
13270 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13271 if (newRoot == NULL) {
13272 if (sax != NULL)
13273 ctxt->sax = oldsax;
13274 xmlFreeParserCtxt(ctxt);
13275 newDoc->intSubset = NULL;
13276 newDoc->extSubset = NULL;
13277 xmlFreeDoc(newDoc);
13278 return(-1);
13279 }
13280 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13281 nodePush(ctxt, newRoot);
13282 if (doc == NULL) {
13283 ctxt->myDoc = newDoc;
13284 } else {
13285 ctxt->myDoc = newDoc;
13286 newDoc->children->doc = doc;
13287 /* Ensure that doc has XML spec namespace */
13288 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13289 newDoc->oldNs = doc->oldNs;
13290 }
13291 ctxt->instate = XML_PARSER_CONTENT;
13292 ctxt->depth = depth;
13293
13294 /*
13295 * Doing validity checking on chunk doesn't make sense
13296 */
13297 ctxt->validate = 0;
13298 ctxt->loadsubset = 0;
13299 xmlDetectSAX2(ctxt);
13300
13301 if ( doc != NULL ){
13302 content = doc->children;
13303 doc->children = NULL;
13304 xmlParseContent(ctxt);
13305 doc->children = content;
13306 }
13307 else {
13308 xmlParseContent(ctxt);
13309 }
13310 if ((RAW == '<') && (NXT(1) == '/')) {
13311 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13312 } else if (RAW != 0) {
13313 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13314 }
13315 if (ctxt->node != newDoc->children) {
13316 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13317 }
13318
13319 if (!ctxt->wellFormed) {
13320 if (ctxt->errNo == 0)
13321 ret = 1;
13322 else
13323 ret = ctxt->errNo;
13324 } else {
13325 ret = 0;
13326 }
13327
13328 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13329 xmlNodePtr cur;
13330
13331 /*
13332 * Return the newly created nodeset after unlinking it from
13333 * they pseudo parent.
13334 */
13335 cur = newDoc->children->children;
13336 *lst = cur;
13337 while (cur != NULL) {
13338 xmlSetTreeDoc(cur, doc);
13339 cur->parent = NULL;
13340 cur = cur->next;
13341 }
13342 newDoc->children->children = NULL;
13343 }
13344
13345 if (sax != NULL)
13346 ctxt->sax = oldsax;
13347 xmlFreeParserCtxt(ctxt);
13348 newDoc->intSubset = NULL;
13349 newDoc->extSubset = NULL;
13350 newDoc->oldNs = NULL;
13351 xmlFreeDoc(newDoc);
13352
13353 return(ret);
13354 }
13355
13356 /**
13357 * xmlSAXParseEntity:
13358 * @sax: the SAX handler block
13359 * @filename: the filename
13360 *
13361 * parse an XML external entity out of context and build a tree.
13362 * It use the given SAX function block to handle the parsing callback.
13363 * If sax is NULL, fallback to the default DOM tree building routines.
13364 *
13365 * [78] extParsedEnt ::= TextDecl? content
13366 *
13367 * This correspond to a "Well Balanced" chunk
13368 *
13369 * Returns the resulting document tree
13370 */
13371
13372 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)13373 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13374 xmlDocPtr ret;
13375 xmlParserCtxtPtr ctxt;
13376
13377 ctxt = xmlCreateFileParserCtxt(filename);
13378 if (ctxt == NULL) {
13379 return(NULL);
13380 }
13381 if (sax != NULL) {
13382 if (ctxt->sax != NULL)
13383 xmlFree(ctxt->sax);
13384 ctxt->sax = sax;
13385 ctxt->userData = NULL;
13386 }
13387
13388 xmlParseExtParsedEnt(ctxt);
13389
13390 if (ctxt->wellFormed)
13391 ret = ctxt->myDoc;
13392 else {
13393 ret = NULL;
13394 xmlFreeDoc(ctxt->myDoc);
13395 ctxt->myDoc = NULL;
13396 }
13397 if (sax != NULL)
13398 ctxt->sax = NULL;
13399 xmlFreeParserCtxt(ctxt);
13400
13401 return(ret);
13402 }
13403
13404 /**
13405 * xmlParseEntity:
13406 * @filename: the filename
13407 *
13408 * parse an XML external entity out of context and build a tree.
13409 *
13410 * [78] extParsedEnt ::= TextDecl? content
13411 *
13412 * This correspond to a "Well Balanced" chunk
13413 *
13414 * Returns the resulting document tree
13415 */
13416
13417 xmlDocPtr
xmlParseEntity(const char * filename)13418 xmlParseEntity(const char *filename) {
13419 return(xmlSAXParseEntity(NULL, filename));
13420 }
13421 #endif /* LIBXML_SAX1_ENABLED */
13422
13423 /**
13424 * xmlCreateEntityParserCtxtInternal:
13425 * @URL: the entity URL
13426 * @ID: the entity PUBLIC ID
13427 * @base: a possible base for the target URI
13428 * @pctx: parser context used to set options on new context
13429 *
13430 * Create a parser context for an external entity
13431 * Automatic support for ZLIB/Compress compressed document is provided
13432 * by default if found at compile-time.
13433 *
13434 * Returns the new parser context or NULL
13435 */
13436 static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar * URL,const xmlChar * ID,const xmlChar * base,xmlParserCtxtPtr pctx)13437 xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13438 const xmlChar *base, xmlParserCtxtPtr pctx) {
13439 xmlParserCtxtPtr ctxt;
13440 xmlParserInputPtr inputStream;
13441 char *directory = NULL;
13442 xmlChar *uri;
13443
13444 ctxt = xmlNewParserCtxt();
13445 if (ctxt == NULL) {
13446 return(NULL);
13447 }
13448
13449 if (pctx != NULL) {
13450 ctxt->options = pctx->options;
13451 ctxt->_private = pctx->_private;
13452 }
13453
13454 uri = xmlBuildURI(URL, base);
13455
13456 if (uri == NULL) {
13457 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13458 if (inputStream == NULL) {
13459 xmlFreeParserCtxt(ctxt);
13460 return(NULL);
13461 }
13462
13463 inputPush(ctxt, inputStream);
13464
13465 if ((ctxt->directory == NULL) && (directory == NULL))
13466 directory = xmlParserGetDirectory((char *)URL);
13467 if ((ctxt->directory == NULL) && (directory != NULL))
13468 ctxt->directory = directory;
13469 } else {
13470 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13471 if (inputStream == NULL) {
13472 xmlFree(uri);
13473 xmlFreeParserCtxt(ctxt);
13474 return(NULL);
13475 }
13476
13477 inputPush(ctxt, inputStream);
13478
13479 if ((ctxt->directory == NULL) && (directory == NULL))
13480 directory = xmlParserGetDirectory((char *)uri);
13481 if ((ctxt->directory == NULL) && (directory != NULL))
13482 ctxt->directory = directory;
13483 xmlFree(uri);
13484 }
13485 return(ctxt);
13486 }
13487
13488 /**
13489 * xmlCreateEntityParserCtxt:
13490 * @URL: the entity URL
13491 * @ID: the entity PUBLIC ID
13492 * @base: a possible base for the target URI
13493 *
13494 * Create a parser context for an external entity
13495 * Automatic support for ZLIB/Compress compressed document is provided
13496 * by default if found at compile-time.
13497 *
13498 * Returns the new parser context or NULL
13499 */
13500 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)13501 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13502 const xmlChar *base) {
13503 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13504
13505 }
13506
13507 /************************************************************************
13508 * *
13509 * Front ends when parsing from a file *
13510 * *
13511 ************************************************************************/
13512
13513 /**
13514 * xmlCreateURLParserCtxt:
13515 * @filename: the filename or URL
13516 * @options: a combination of xmlParserOption
13517 *
13518 * Create a parser context for a file or URL content.
13519 * Automatic support for ZLIB/Compress compressed document is provided
13520 * by default if found at compile-time and for file accesses
13521 *
13522 * Returns the new parser context or NULL
13523 */
13524 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)13525 xmlCreateURLParserCtxt(const char *filename, int options)
13526 {
13527 xmlParserCtxtPtr ctxt;
13528 xmlParserInputPtr inputStream;
13529 char *directory = NULL;
13530
13531 ctxt = xmlNewParserCtxt();
13532 if (ctxt == NULL) {
13533 xmlErrMemory(NULL, "cannot allocate parser context");
13534 return(NULL);
13535 }
13536
13537 if (options)
13538 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13539 ctxt->linenumbers = 1;
13540
13541 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13542 if (inputStream == NULL) {
13543 xmlFreeParserCtxt(ctxt);
13544 return(NULL);
13545 }
13546
13547 inputPush(ctxt, inputStream);
13548 if ((ctxt->directory == NULL) && (directory == NULL))
13549 directory = xmlParserGetDirectory(filename);
13550 if ((ctxt->directory == NULL) && (directory != NULL))
13551 ctxt->directory = directory;
13552
13553 return(ctxt);
13554 }
13555
13556 /**
13557 * xmlCreateFileParserCtxt:
13558 * @filename: the filename
13559 *
13560 * Create a parser context for a file content.
13561 * Automatic support for ZLIB/Compress compressed document is provided
13562 * by default if found at compile-time.
13563 *
13564 * Returns the new parser context or NULL
13565 */
13566 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)13567 xmlCreateFileParserCtxt(const char *filename)
13568 {
13569 return(xmlCreateURLParserCtxt(filename, 0));
13570 }
13571
13572 #ifdef LIBXML_SAX1_ENABLED
13573 /**
13574 * xmlSAXParseFileWithData:
13575 * @sax: the SAX handler block
13576 * @filename: the filename
13577 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13578 * documents
13579 * @data: the userdata
13580 *
13581 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13582 * compressed document is provided by default if found at compile-time.
13583 * It use the given SAX function block to handle the parsing callback.
13584 * If sax is NULL, fallback to the default DOM tree building routines.
13585 *
13586 * User data (void *) is stored within the parser context in the
13587 * context's _private member, so it is available nearly everywhere in libxml
13588 *
13589 * Returns the resulting document tree
13590 */
13591
13592 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)13593 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13594 int recovery, void *data) {
13595 xmlDocPtr ret;
13596 xmlParserCtxtPtr ctxt;
13597
13598 xmlInitParser();
13599
13600 ctxt = xmlCreateFileParserCtxt(filename);
13601 if (ctxt == NULL) {
13602 return(NULL);
13603 }
13604 if (sax != NULL) {
13605 if (ctxt->sax != NULL)
13606 xmlFree(ctxt->sax);
13607 ctxt->sax = sax;
13608 }
13609 xmlDetectSAX2(ctxt);
13610 if (data!=NULL) {
13611 ctxt->_private = data;
13612 }
13613
13614 if (ctxt->directory == NULL)
13615 ctxt->directory = xmlParserGetDirectory(filename);
13616
13617 ctxt->recovery = recovery;
13618
13619 xmlParseDocument(ctxt);
13620
13621 if ((ctxt->wellFormed) || recovery) {
13622 ret = ctxt->myDoc;
13623 if (ret != NULL) {
13624 if (ctxt->input->buf->compressed > 0)
13625 ret->compression = 9;
13626 else
13627 ret->compression = ctxt->input->buf->compressed;
13628 }
13629 }
13630 else {
13631 ret = NULL;
13632 xmlFreeDoc(ctxt->myDoc);
13633 ctxt->myDoc = NULL;
13634 }
13635 if (sax != NULL)
13636 ctxt->sax = NULL;
13637 xmlFreeParserCtxt(ctxt);
13638
13639 return(ret);
13640 }
13641
13642 /**
13643 * xmlSAXParseFile:
13644 * @sax: the SAX handler block
13645 * @filename: the filename
13646 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13647 * documents
13648 *
13649 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13650 * compressed document is provided by default if found at compile-time.
13651 * It use the given SAX function block to handle the parsing callback.
13652 * If sax is NULL, fallback to the default DOM tree building routines.
13653 *
13654 * Returns the resulting document tree
13655 */
13656
13657 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)13658 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13659 int recovery) {
13660 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13661 }
13662
13663 /**
13664 * xmlRecoverDoc:
13665 * @cur: a pointer to an array of xmlChar
13666 *
13667 * parse an XML in-memory document and build a tree.
13668 * In the case the document is not Well Formed, a attempt to build a
13669 * tree is tried anyway
13670 *
13671 * Returns the resulting document tree or NULL in case of failure
13672 */
13673
13674 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)13675 xmlRecoverDoc(const xmlChar *cur) {
13676 return(xmlSAXParseDoc(NULL, cur, 1));
13677 }
13678
13679 /**
13680 * xmlParseFile:
13681 * @filename: the filename
13682 *
13683 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13684 * compressed document is provided by default if found at compile-time.
13685 *
13686 * Returns the resulting document tree if the file was wellformed,
13687 * NULL otherwise.
13688 */
13689
13690 xmlDocPtr
xmlParseFile(const char * filename)13691 xmlParseFile(const char *filename) {
13692 return(xmlSAXParseFile(NULL, filename, 0));
13693 }
13694
13695 /**
13696 * xmlRecoverFile:
13697 * @filename: the filename
13698 *
13699 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13700 * compressed document is provided by default if found at compile-time.
13701 * In the case the document is not Well Formed, it attempts to build
13702 * a tree anyway
13703 *
13704 * Returns the resulting document tree or NULL in case of failure
13705 */
13706
13707 xmlDocPtr
xmlRecoverFile(const char * filename)13708 xmlRecoverFile(const char *filename) {
13709 return(xmlSAXParseFile(NULL, filename, 1));
13710 }
13711
13712
13713 /**
13714 * xmlSetupParserForBuffer:
13715 * @ctxt: an XML parser context
13716 * @buffer: a xmlChar * buffer
13717 * @filename: a file name
13718 *
13719 * Setup the parser context to parse a new buffer; Clears any prior
13720 * contents from the parser context. The buffer parameter must not be
13721 * NULL, but the filename parameter can be
13722 */
13723 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)13724 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13725 const char* filename)
13726 {
13727 xmlParserInputPtr input;
13728
13729 if ((ctxt == NULL) || (buffer == NULL))
13730 return;
13731
13732 input = xmlNewInputStream(ctxt);
13733 if (input == NULL) {
13734 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13735 xmlClearParserCtxt(ctxt);
13736 return;
13737 }
13738
13739 xmlClearParserCtxt(ctxt);
13740 if (filename != NULL)
13741 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13742 input->base = buffer;
13743 input->cur = buffer;
13744 input->end = &buffer[xmlStrlen(buffer)];
13745 inputPush(ctxt, input);
13746 }
13747
13748 /**
13749 * xmlSAXUserParseFile:
13750 * @sax: a SAX handler
13751 * @user_data: The user data returned on SAX callbacks
13752 * @filename: a file name
13753 *
13754 * parse an XML file and call the given SAX handler routines.
13755 * Automatic support for ZLIB/Compress compressed document is provided
13756 *
13757 * Returns 0 in case of success or a error number otherwise
13758 */
13759 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)13760 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13761 const char *filename) {
13762 int ret = 0;
13763 xmlParserCtxtPtr ctxt;
13764
13765 ctxt = xmlCreateFileParserCtxt(filename);
13766 if (ctxt == NULL) return -1;
13767 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13768 xmlFree(ctxt->sax);
13769 ctxt->sax = sax;
13770 xmlDetectSAX2(ctxt);
13771
13772 if (user_data != NULL)
13773 ctxt->userData = user_data;
13774
13775 xmlParseDocument(ctxt);
13776
13777 if (ctxt->wellFormed)
13778 ret = 0;
13779 else {
13780 if (ctxt->errNo != 0)
13781 ret = ctxt->errNo;
13782 else
13783 ret = -1;
13784 }
13785 if (sax != NULL)
13786 ctxt->sax = NULL;
13787 if (ctxt->myDoc != NULL) {
13788 xmlFreeDoc(ctxt->myDoc);
13789 ctxt->myDoc = NULL;
13790 }
13791 xmlFreeParserCtxt(ctxt);
13792
13793 return ret;
13794 }
13795 #endif /* LIBXML_SAX1_ENABLED */
13796
13797 /************************************************************************
13798 * *
13799 * Front ends when parsing from memory *
13800 * *
13801 ************************************************************************/
13802
13803 /**
13804 * xmlCreateMemoryParserCtxt:
13805 * @buffer: a pointer to a char array
13806 * @size: the size of the array
13807 *
13808 * Create a parser context for an XML in-memory document.
13809 *
13810 * Returns the new parser context or NULL
13811 */
13812 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)13813 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13814 xmlParserCtxtPtr ctxt;
13815 xmlParserInputPtr input;
13816 xmlParserInputBufferPtr buf;
13817
13818 if (buffer == NULL)
13819 return(NULL);
13820 if (size <= 0)
13821 return(NULL);
13822
13823 ctxt = xmlNewParserCtxt();
13824 if (ctxt == NULL)
13825 return(NULL);
13826
13827 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
13828 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13829 if (buf == NULL) {
13830 xmlFreeParserCtxt(ctxt);
13831 return(NULL);
13832 }
13833
13834 input = xmlNewInputStream(ctxt);
13835 if (input == NULL) {
13836 xmlFreeParserInputBuffer(buf);
13837 xmlFreeParserCtxt(ctxt);
13838 return(NULL);
13839 }
13840
13841 input->filename = NULL;
13842 input->buf = buf;
13843 input->base = input->buf->buffer->content;
13844 input->cur = input->buf->buffer->content;
13845 input->end = &input->buf->buffer->content[input->buf->buffer->use];
13846
13847 inputPush(ctxt, input);
13848 return(ctxt);
13849 }
13850
13851 #ifdef LIBXML_SAX1_ENABLED
13852 /**
13853 * xmlSAXParseMemoryWithData:
13854 * @sax: the SAX handler block
13855 * @buffer: an pointer to a char array
13856 * @size: the size of the array
13857 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13858 * documents
13859 * @data: the userdata
13860 *
13861 * parse an XML in-memory block and use the given SAX function block
13862 * to handle the parsing callback. If sax is NULL, fallback to the default
13863 * DOM tree building routines.
13864 *
13865 * User data (void *) is stored within the parser context in the
13866 * context's _private member, so it is available nearly everywhere in libxml
13867 *
13868 * Returns the resulting document tree
13869 */
13870
13871 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)13872 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13873 int size, int recovery, void *data) {
13874 xmlDocPtr ret;
13875 xmlParserCtxtPtr ctxt;
13876
13877 xmlInitParser();
13878
13879 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13880 if (ctxt == NULL) return(NULL);
13881 if (sax != NULL) {
13882 if (ctxt->sax != NULL)
13883 xmlFree(ctxt->sax);
13884 ctxt->sax = sax;
13885 }
13886 xmlDetectSAX2(ctxt);
13887 if (data!=NULL) {
13888 ctxt->_private=data;
13889 }
13890
13891 ctxt->recovery = recovery;
13892
13893 xmlParseDocument(ctxt);
13894
13895 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13896 else {
13897 ret = NULL;
13898 xmlFreeDoc(ctxt->myDoc);
13899 ctxt->myDoc = NULL;
13900 }
13901 if (sax != NULL)
13902 ctxt->sax = NULL;
13903 xmlFreeParserCtxt(ctxt);
13904
13905 return(ret);
13906 }
13907
13908 /**
13909 * xmlSAXParseMemory:
13910 * @sax: the SAX handler block
13911 * @buffer: an pointer to a char array
13912 * @size: the size of the array
13913 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13914 * documents
13915 *
13916 * parse an XML in-memory block and use the given SAX function block
13917 * to handle the parsing callback. If sax is NULL, fallback to the default
13918 * DOM tree building routines.
13919 *
13920 * Returns the resulting document tree
13921 */
13922 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)13923 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13924 int size, int recovery) {
13925 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13926 }
13927
13928 /**
13929 * xmlParseMemory:
13930 * @buffer: an pointer to a char array
13931 * @size: the size of the array
13932 *
13933 * parse an XML in-memory block and build a tree.
13934 *
13935 * Returns the resulting document tree
13936 */
13937
xmlParseMemory(const char * buffer,int size)13938 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13939 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13940 }
13941
13942 /**
13943 * xmlRecoverMemory:
13944 * @buffer: an pointer to a char array
13945 * @size: the size of the array
13946 *
13947 * parse an XML in-memory block and build a tree.
13948 * In the case the document is not Well Formed, an attempt to
13949 * build a tree is tried anyway
13950 *
13951 * Returns the resulting document tree or NULL in case of error
13952 */
13953
xmlRecoverMemory(const char * buffer,int size)13954 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13955 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13956 }
13957
13958 /**
13959 * xmlSAXUserParseMemory:
13960 * @sax: a SAX handler
13961 * @user_data: The user data returned on SAX callbacks
13962 * @buffer: an in-memory XML document input
13963 * @size: the length of the XML document in bytes
13964 *
13965 * A better SAX parsing routine.
13966 * parse an XML in-memory buffer and call the given SAX handler routines.
13967 *
13968 * Returns 0 in case of success or a error number otherwise
13969 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13970 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13971 const char *buffer, int size) {
13972 int ret = 0;
13973 xmlParserCtxtPtr ctxt;
13974
13975 xmlInitParser();
13976
13977 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13978 if (ctxt == NULL) return -1;
13979 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13980 xmlFree(ctxt->sax);
13981 ctxt->sax = sax;
13982 xmlDetectSAX2(ctxt);
13983
13984 if (user_data != NULL)
13985 ctxt->userData = user_data;
13986
13987 xmlParseDocument(ctxt);
13988
13989 if (ctxt->wellFormed)
13990 ret = 0;
13991 else {
13992 if (ctxt->errNo != 0)
13993 ret = ctxt->errNo;
13994 else
13995 ret = -1;
13996 }
13997 if (sax != NULL)
13998 ctxt->sax = NULL;
13999 if (ctxt->myDoc != NULL) {
14000 xmlFreeDoc(ctxt->myDoc);
14001 ctxt->myDoc = NULL;
14002 }
14003 xmlFreeParserCtxt(ctxt);
14004
14005 return ret;
14006 }
14007 #endif /* LIBXML_SAX1_ENABLED */
14008
14009 /**
14010 * xmlCreateDocParserCtxt:
14011 * @cur: a pointer to an array of xmlChar
14012 *
14013 * Creates a parser context for an XML in-memory document.
14014 *
14015 * Returns the new parser context or NULL
14016 */
14017 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * cur)14018 xmlCreateDocParserCtxt(const xmlChar *cur) {
14019 int len;
14020
14021 if (cur == NULL)
14022 return(NULL);
14023 len = xmlStrlen(cur);
14024 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14025 }
14026
14027 #ifdef LIBXML_SAX1_ENABLED
14028 /**
14029 * xmlSAXParseDoc:
14030 * @sax: the SAX handler block
14031 * @cur: a pointer to an array of xmlChar
14032 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14033 * documents
14034 *
14035 * parse an XML in-memory document and build a tree.
14036 * It use the given SAX function block to handle the parsing callback.
14037 * If sax is NULL, fallback to the default DOM tree building routines.
14038 *
14039 * Returns the resulting document tree
14040 */
14041
14042 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)14043 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14044 xmlDocPtr ret;
14045 xmlParserCtxtPtr ctxt;
14046 xmlSAXHandlerPtr oldsax = NULL;
14047
14048 if (cur == NULL) return(NULL);
14049
14050
14051 ctxt = xmlCreateDocParserCtxt(cur);
14052 if (ctxt == NULL) return(NULL);
14053 if (sax != NULL) {
14054 oldsax = ctxt->sax;
14055 ctxt->sax = sax;
14056 ctxt->userData = NULL;
14057 }
14058 xmlDetectSAX2(ctxt);
14059
14060 xmlParseDocument(ctxt);
14061 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14062 else {
14063 ret = NULL;
14064 xmlFreeDoc(ctxt->myDoc);
14065 ctxt->myDoc = NULL;
14066 }
14067 if (sax != NULL)
14068 ctxt->sax = oldsax;
14069 xmlFreeParserCtxt(ctxt);
14070
14071 return(ret);
14072 }
14073
14074 /**
14075 * xmlParseDoc:
14076 * @cur: a pointer to an array of xmlChar
14077 *
14078 * parse an XML in-memory document and build a tree.
14079 *
14080 * Returns the resulting document tree
14081 */
14082
14083 xmlDocPtr
xmlParseDoc(const xmlChar * cur)14084 xmlParseDoc(const xmlChar *cur) {
14085 return(xmlSAXParseDoc(NULL, cur, 0));
14086 }
14087 #endif /* LIBXML_SAX1_ENABLED */
14088
14089 #ifdef LIBXML_LEGACY_ENABLED
14090 /************************************************************************
14091 * *
14092 * Specific function to keep track of entities references *
14093 * and used by the XSLT debugger *
14094 * *
14095 ************************************************************************/
14096
14097 static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14098
14099 /**
14100 * xmlAddEntityReference:
14101 * @ent : A valid entity
14102 * @firstNode : A valid first node for children of entity
14103 * @lastNode : A valid last node of children entity
14104 *
14105 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14106 */
14107 static void
xmlAddEntityReference(xmlEntityPtr ent,xmlNodePtr firstNode,xmlNodePtr lastNode)14108 xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14109 xmlNodePtr lastNode)
14110 {
14111 if (xmlEntityRefFunc != NULL) {
14112 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14113 }
14114 }
14115
14116
14117 /**
14118 * xmlSetEntityReferenceFunc:
14119 * @func: A valid function
14120 *
14121 * Set the function to call call back when a xml reference has been made
14122 */
14123 void
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)14124 xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14125 {
14126 xmlEntityRefFunc = func;
14127 }
14128 #endif /* LIBXML_LEGACY_ENABLED */
14129
14130 /************************************************************************
14131 * *
14132 * Miscellaneous *
14133 * *
14134 ************************************************************************/
14135
14136 #ifdef LIBXML_XPATH_ENABLED
14137 #include <libxml/xpath.h>
14138 #endif
14139
14140 extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14141 static int xmlParserInitialized = 0;
14142
14143 /**
14144 * xmlInitParser:
14145 *
14146 * Initialization function for the XML parser.
14147 * This is not reentrant. Call once before processing in case of
14148 * use in multithreaded programs.
14149 */
14150
14151 void
xmlInitParser(void)14152 xmlInitParser(void) {
14153 if (xmlParserInitialized != 0)
14154 return;
14155
14156 #ifdef LIBXML_THREAD_ENABLED
14157 __xmlGlobalInitMutexLock();
14158 if (xmlParserInitialized == 0) {
14159 #endif
14160 xmlInitThreads();
14161 xmlInitGlobals();
14162 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14163 (xmlGenericError == NULL))
14164 initGenericErrorDefaultFunc(NULL);
14165 xmlInitMemory();
14166 xmlInitCharEncodingHandlers();
14167 xmlDefaultSAXHandlerInit();
14168 xmlRegisterDefaultInputCallbacks();
14169 #ifdef LIBXML_OUTPUT_ENABLED
14170 xmlRegisterDefaultOutputCallbacks();
14171 #endif /* LIBXML_OUTPUT_ENABLED */
14172 #ifdef LIBXML_HTML_ENABLED
14173 htmlInitAutoClose();
14174 htmlDefaultSAXHandlerInit();
14175 #endif
14176 #ifdef LIBXML_XPATH_ENABLED
14177 xmlXPathInit();
14178 #endif
14179 xmlParserInitialized = 1;
14180 #ifdef LIBXML_THREAD_ENABLED
14181 }
14182 __xmlGlobalInitMutexUnlock();
14183 #endif
14184 }
14185
14186 /**
14187 * xmlCleanupParser:
14188 *
14189 * This function name is somewhat misleading. It does not clean up
14190 * parser state, it cleans up memory allocated by the library itself.
14191 * It is a cleanup function for the XML library. It tries to reclaim all
14192 * related global memory allocated for the library processing.
14193 * It doesn't deallocate any document related memory. One should
14194 * call xmlCleanupParser() only when the process has finished using
14195 * the library and all XML/HTML documents built with it.
14196 * See also xmlInitParser() which has the opposite function of preparing
14197 * the library for operations.
14198 *
14199 * WARNING: if your application is multithreaded or has plugin support
14200 * calling this may crash the application if another thread or
14201 * a plugin is still using libxml2. It's sometimes very hard to
14202 * guess if libxml2 is in use in the application, some libraries
14203 * or plugins may use it without notice. In case of doubt abstain
14204 * from calling this function or do it just before calling exit()
14205 * to avoid leak reports from valgrind !
14206 */
14207
14208 void
xmlCleanupParser(void)14209 xmlCleanupParser(void) {
14210 if (!xmlParserInitialized)
14211 return;
14212
14213 xmlCleanupCharEncodingHandlers();
14214 #ifdef LIBXML_CATALOG_ENABLED
14215 xmlCatalogCleanup();
14216 #endif
14217 xmlDictCleanup();
14218 xmlCleanupInputCallbacks();
14219 #ifdef LIBXML_OUTPUT_ENABLED
14220 xmlCleanupOutputCallbacks();
14221 #endif
14222 #ifdef LIBXML_SCHEMAS_ENABLED
14223 xmlSchemaCleanupTypes();
14224 xmlRelaxNGCleanupTypes();
14225 #endif
14226 xmlCleanupGlobals();
14227 xmlResetLastError();
14228 xmlCleanupThreads(); /* must be last if called not from the main thread */
14229 xmlCleanupMemory();
14230 xmlParserInitialized = 0;
14231 }
14232
14233 /************************************************************************
14234 * *
14235 * New set (2.6.0) of simpler and more flexible APIs *
14236 * *
14237 ************************************************************************/
14238
14239 /**
14240 * DICT_FREE:
14241 * @str: a string
14242 *
14243 * Free a string if it is not owned by the "dict" dictionnary in the
14244 * current scope
14245 */
14246 #define DICT_FREE(str) \
14247 if ((str) && ((!dict) || \
14248 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14249 xmlFree((char *)(str));
14250
14251 /**
14252 * xmlCtxtReset:
14253 * @ctxt: an XML parser context
14254 *
14255 * Reset a parser context
14256 */
14257 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)14258 xmlCtxtReset(xmlParserCtxtPtr ctxt)
14259 {
14260 xmlParserInputPtr input;
14261 xmlDictPtr dict;
14262
14263 if (ctxt == NULL)
14264 return;
14265
14266 dict = ctxt->dict;
14267
14268 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14269 xmlFreeInputStream(input);
14270 }
14271 ctxt->inputNr = 0;
14272 ctxt->input = NULL;
14273
14274 ctxt->spaceNr = 0;
14275 if (ctxt->spaceTab != NULL) {
14276 ctxt->spaceTab[0] = -1;
14277 ctxt->space = &ctxt->spaceTab[0];
14278 } else {
14279 ctxt->space = NULL;
14280 }
14281
14282
14283 ctxt->nodeNr = 0;
14284 ctxt->node = NULL;
14285
14286 ctxt->nameNr = 0;
14287 ctxt->name = NULL;
14288
14289 DICT_FREE(ctxt->version);
14290 ctxt->version = NULL;
14291 DICT_FREE(ctxt->encoding);
14292 ctxt->encoding = NULL;
14293 DICT_FREE(ctxt->directory);
14294 ctxt->directory = NULL;
14295 DICT_FREE(ctxt->extSubURI);
14296 ctxt->extSubURI = NULL;
14297 DICT_FREE(ctxt->extSubSystem);
14298 ctxt->extSubSystem = NULL;
14299 if (ctxt->myDoc != NULL)
14300 xmlFreeDoc(ctxt->myDoc);
14301 ctxt->myDoc = NULL;
14302
14303 ctxt->standalone = -1;
14304 ctxt->hasExternalSubset = 0;
14305 ctxt->hasPErefs = 0;
14306 ctxt->html = 0;
14307 ctxt->external = 0;
14308 ctxt->instate = XML_PARSER_START;
14309 ctxt->token = 0;
14310
14311 ctxt->wellFormed = 1;
14312 ctxt->nsWellFormed = 1;
14313 ctxt->disableSAX = 0;
14314 ctxt->valid = 1;
14315 #if 0
14316 ctxt->vctxt.userData = ctxt;
14317 ctxt->vctxt.error = xmlParserValidityError;
14318 ctxt->vctxt.warning = xmlParserValidityWarning;
14319 #endif
14320 ctxt->record_info = 0;
14321 ctxt->nbChars = 0;
14322 ctxt->checkIndex = 0;
14323 ctxt->inSubset = 0;
14324 ctxt->errNo = XML_ERR_OK;
14325 ctxt->depth = 0;
14326 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14327 ctxt->catalogs = NULL;
14328 ctxt->nbentities = 0;
14329 ctxt->sizeentities = 0;
14330 xmlInitNodeInfoSeq(&ctxt->node_seq);
14331
14332 if (ctxt->attsDefault != NULL) {
14333 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14334 ctxt->attsDefault = NULL;
14335 }
14336 if (ctxt->attsSpecial != NULL) {
14337 xmlHashFree(ctxt->attsSpecial, NULL);
14338 ctxt->attsSpecial = NULL;
14339 }
14340
14341 #ifdef LIBXML_CATALOG_ENABLED
14342 if (ctxt->catalogs != NULL)
14343 xmlCatalogFreeLocal(ctxt->catalogs);
14344 #endif
14345 if (ctxt->lastError.code != XML_ERR_OK)
14346 xmlResetError(&ctxt->lastError);
14347 }
14348
14349 /**
14350 * xmlCtxtResetPush:
14351 * @ctxt: an XML parser context
14352 * @chunk: a pointer to an array of chars
14353 * @size: number of chars in the array
14354 * @filename: an optional file name or URI
14355 * @encoding: the document encoding, or NULL
14356 *
14357 * Reset a push parser context
14358 *
14359 * Returns 0 in case of success and 1 in case of error
14360 */
14361 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)14362 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14363 int size, const char *filename, const char *encoding)
14364 {
14365 xmlParserInputPtr inputStream;
14366 xmlParserInputBufferPtr buf;
14367 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14368
14369 if (ctxt == NULL)
14370 return(1);
14371
14372 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14373 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14374
14375 buf = xmlAllocParserInputBuffer(enc);
14376 if (buf == NULL)
14377 return(1);
14378
14379 if (ctxt == NULL) {
14380 xmlFreeParserInputBuffer(buf);
14381 return(1);
14382 }
14383
14384 xmlCtxtReset(ctxt);
14385
14386 if (ctxt->pushTab == NULL) {
14387 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14388 sizeof(xmlChar *));
14389 if (ctxt->pushTab == NULL) {
14390 xmlErrMemory(ctxt, NULL);
14391 xmlFreeParserInputBuffer(buf);
14392 return(1);
14393 }
14394 }
14395
14396 if (filename == NULL) {
14397 ctxt->directory = NULL;
14398 } else {
14399 ctxt->directory = xmlParserGetDirectory(filename);
14400 }
14401
14402 inputStream = xmlNewInputStream(ctxt);
14403 if (inputStream == NULL) {
14404 xmlFreeParserInputBuffer(buf);
14405 return(1);
14406 }
14407
14408 if (filename == NULL)
14409 inputStream->filename = NULL;
14410 else
14411 inputStream->filename = (char *)
14412 xmlCanonicPath((const xmlChar *) filename);
14413 inputStream->buf = buf;
14414 inputStream->base = inputStream->buf->buffer->content;
14415 inputStream->cur = inputStream->buf->buffer->content;
14416 inputStream->end =
14417 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14418
14419 inputPush(ctxt, inputStream);
14420
14421 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14422 (ctxt->input->buf != NULL)) {
14423 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14424 int cur = ctxt->input->cur - ctxt->input->base;
14425
14426 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14427
14428 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14429 ctxt->input->cur = ctxt->input->base + cur;
14430 ctxt->input->end =
14431 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14432 use];
14433 #ifdef DEBUG_PUSH
14434 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14435 #endif
14436 }
14437
14438 if (encoding != NULL) {
14439 xmlCharEncodingHandlerPtr hdlr;
14440
14441 if (ctxt->encoding != NULL)
14442 xmlFree((xmlChar *) ctxt->encoding);
14443 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14444
14445 hdlr = xmlFindCharEncodingHandler(encoding);
14446 if (hdlr != NULL) {
14447 xmlSwitchToEncoding(ctxt, hdlr);
14448 } else {
14449 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14450 "Unsupported encoding %s\n", BAD_CAST encoding);
14451 }
14452 } else if (enc != XML_CHAR_ENCODING_NONE) {
14453 xmlSwitchEncoding(ctxt, enc);
14454 }
14455
14456 return(0);
14457 }
14458
14459
14460 /**
14461 * xmlCtxtUseOptionsInternal:
14462 * @ctxt: an XML parser context
14463 * @options: a combination of xmlParserOption
14464 * @encoding: the user provided encoding to use
14465 *
14466 * Applies the options to the parser context
14467 *
14468 * Returns 0 in case of success, the set of unknown or unimplemented options
14469 * in case of error.
14470 */
14471 static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt,int options,const char * encoding)14472 xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14473 {
14474 if (ctxt == NULL)
14475 return(-1);
14476 if (encoding != NULL) {
14477 if (ctxt->encoding != NULL)
14478 xmlFree((xmlChar *) ctxt->encoding);
14479 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14480 }
14481 if (options & XML_PARSE_RECOVER) {
14482 ctxt->recovery = 1;
14483 options -= XML_PARSE_RECOVER;
14484 ctxt->options |= XML_PARSE_RECOVER;
14485 } else
14486 ctxt->recovery = 0;
14487 if (options & XML_PARSE_DTDLOAD) {
14488 ctxt->loadsubset = XML_DETECT_IDS;
14489 options -= XML_PARSE_DTDLOAD;
14490 ctxt->options |= XML_PARSE_DTDLOAD;
14491 } else
14492 ctxt->loadsubset = 0;
14493 if (options & XML_PARSE_DTDATTR) {
14494 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14495 options -= XML_PARSE_DTDATTR;
14496 ctxt->options |= XML_PARSE_DTDATTR;
14497 }
14498 if (options & XML_PARSE_NOENT) {
14499 ctxt->replaceEntities = 1;
14500 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14501 options -= XML_PARSE_NOENT;
14502 ctxt->options |= XML_PARSE_NOENT;
14503 } else
14504 ctxt->replaceEntities = 0;
14505 if (options & XML_PARSE_PEDANTIC) {
14506 ctxt->pedantic = 1;
14507 options -= XML_PARSE_PEDANTIC;
14508 ctxt->options |= XML_PARSE_PEDANTIC;
14509 } else
14510 ctxt->pedantic = 0;
14511 if (options & XML_PARSE_NOBLANKS) {
14512 ctxt->keepBlanks = 0;
14513 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14514 options -= XML_PARSE_NOBLANKS;
14515 ctxt->options |= XML_PARSE_NOBLANKS;
14516 } else
14517 ctxt->keepBlanks = 1;
14518 if (options & XML_PARSE_DTDVALID) {
14519 ctxt->validate = 1;
14520 if (options & XML_PARSE_NOWARNING)
14521 ctxt->vctxt.warning = NULL;
14522 if (options & XML_PARSE_NOERROR)
14523 ctxt->vctxt.error = NULL;
14524 options -= XML_PARSE_DTDVALID;
14525 ctxt->options |= XML_PARSE_DTDVALID;
14526 } else
14527 ctxt->validate = 0;
14528 if (options & XML_PARSE_NOWARNING) {
14529 ctxt->sax->warning = NULL;
14530 options -= XML_PARSE_NOWARNING;
14531 }
14532 if (options & XML_PARSE_NOERROR) {
14533 ctxt->sax->error = NULL;
14534 ctxt->sax->fatalError = NULL;
14535 options -= XML_PARSE_NOERROR;
14536 }
14537 #ifdef LIBXML_SAX1_ENABLED
14538 if (options & XML_PARSE_SAX1) {
14539 ctxt->sax->startElement = xmlSAX2StartElement;
14540 ctxt->sax->endElement = xmlSAX2EndElement;
14541 ctxt->sax->startElementNs = NULL;
14542 ctxt->sax->endElementNs = NULL;
14543 ctxt->sax->initialized = 1;
14544 options -= XML_PARSE_SAX1;
14545 ctxt->options |= XML_PARSE_SAX1;
14546 }
14547 #endif /* LIBXML_SAX1_ENABLED */
14548 if (options & XML_PARSE_NODICT) {
14549 ctxt->dictNames = 0;
14550 options -= XML_PARSE_NODICT;
14551 ctxt->options |= XML_PARSE_NODICT;
14552 } else {
14553 ctxt->dictNames = 1;
14554 }
14555 if (options & XML_PARSE_NOCDATA) {
14556 ctxt->sax->cdataBlock = NULL;
14557 options -= XML_PARSE_NOCDATA;
14558 ctxt->options |= XML_PARSE_NOCDATA;
14559 }
14560 if (options & XML_PARSE_NSCLEAN) {
14561 ctxt->options |= XML_PARSE_NSCLEAN;
14562 options -= XML_PARSE_NSCLEAN;
14563 }
14564 if (options & XML_PARSE_NONET) {
14565 ctxt->options |= XML_PARSE_NONET;
14566 options -= XML_PARSE_NONET;
14567 }
14568 if (options & XML_PARSE_COMPACT) {
14569 ctxt->options |= XML_PARSE_COMPACT;
14570 options -= XML_PARSE_COMPACT;
14571 }
14572 if (options & XML_PARSE_OLD10) {
14573 ctxt->options |= XML_PARSE_OLD10;
14574 options -= XML_PARSE_OLD10;
14575 }
14576 if (options & XML_PARSE_NOBASEFIX) {
14577 ctxt->options |= XML_PARSE_NOBASEFIX;
14578 options -= XML_PARSE_NOBASEFIX;
14579 }
14580 if (options & XML_PARSE_HUGE) {
14581 ctxt->options |= XML_PARSE_HUGE;
14582 options -= XML_PARSE_HUGE;
14583 }
14584 if (options & XML_PARSE_OLDSAX) {
14585 ctxt->options |= XML_PARSE_OLDSAX;
14586 options -= XML_PARSE_OLDSAX;
14587 }
14588 if (options & XML_PARSE_IGNORE_ENC) {
14589 ctxt->options |= XML_PARSE_IGNORE_ENC;
14590 options -= XML_PARSE_IGNORE_ENC;
14591 }
14592 ctxt->linenumbers = 1;
14593 return (options);
14594 }
14595
14596 /**
14597 * xmlCtxtUseOptions:
14598 * @ctxt: an XML parser context
14599 * @options: a combination of xmlParserOption
14600 *
14601 * Applies the options to the parser context
14602 *
14603 * Returns 0 in case of success, the set of unknown or unimplemented options
14604 * in case of error.
14605 */
14606 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)14607 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14608 {
14609 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14610 }
14611
14612 /**
14613 * xmlDoRead:
14614 * @ctxt: an XML parser context
14615 * @URL: the base URL to use for the document
14616 * @encoding: the document encoding, or NULL
14617 * @options: a combination of xmlParserOption
14618 * @reuse: keep the context for reuse
14619 *
14620 * Common front-end for the xmlRead functions
14621 *
14622 * Returns the resulting document tree or NULL
14623 */
14624 static xmlDocPtr
xmlDoRead(xmlParserCtxtPtr ctxt,const char * URL,const char * encoding,int options,int reuse)14625 xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14626 int options, int reuse)
14627 {
14628 xmlDocPtr ret;
14629
14630 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14631 if (encoding != NULL) {
14632 xmlCharEncodingHandlerPtr hdlr;
14633
14634 hdlr = xmlFindCharEncodingHandler(encoding);
14635 if (hdlr != NULL)
14636 xmlSwitchToEncoding(ctxt, hdlr);
14637 }
14638 if ((URL != NULL) && (ctxt->input != NULL) &&
14639 (ctxt->input->filename == NULL))
14640 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14641 xmlParseDocument(ctxt);
14642 if ((ctxt->wellFormed) || ctxt->recovery)
14643 ret = ctxt->myDoc;
14644 else {
14645 ret = NULL;
14646 if (ctxt->myDoc != NULL) {
14647 xmlFreeDoc(ctxt->myDoc);
14648 }
14649 }
14650 ctxt->myDoc = NULL;
14651 if (!reuse) {
14652 xmlFreeParserCtxt(ctxt);
14653 }
14654
14655 return (ret);
14656 }
14657
14658 /**
14659 * xmlReadDoc:
14660 * @cur: a pointer to a zero terminated string
14661 * @URL: the base URL to use for the document
14662 * @encoding: the document encoding, or NULL
14663 * @options: a combination of xmlParserOption
14664 *
14665 * parse an XML in-memory document and build a tree.
14666 *
14667 * Returns the resulting document tree
14668 */
14669 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)14670 xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14671 {
14672 xmlParserCtxtPtr ctxt;
14673
14674 if (cur == NULL)
14675 return (NULL);
14676
14677 ctxt = xmlCreateDocParserCtxt(cur);
14678 if (ctxt == NULL)
14679 return (NULL);
14680 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14681 }
14682
14683 /**
14684 * xmlReadFile:
14685 * @filename: a file or URL
14686 * @encoding: the document encoding, or NULL
14687 * @options: a combination of xmlParserOption
14688 *
14689 * parse an XML file from the filesystem or the network.
14690 *
14691 * Returns the resulting document tree
14692 */
14693 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)14694 xmlReadFile(const char *filename, const char *encoding, int options)
14695 {
14696 xmlParserCtxtPtr ctxt;
14697
14698 ctxt = xmlCreateURLParserCtxt(filename, options);
14699 if (ctxt == NULL)
14700 return (NULL);
14701 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14702 }
14703
14704 /**
14705 * xmlReadMemory:
14706 * @buffer: a pointer to a char array
14707 * @size: the size of the array
14708 * @URL: the base URL to use for the document
14709 * @encoding: the document encoding, or NULL
14710 * @options: a combination of xmlParserOption
14711 *
14712 * parse an XML in-memory document and build a tree.
14713 *
14714 * Returns the resulting document tree
14715 */
14716 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * URL,const char * encoding,int options)14717 xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14718 {
14719 xmlParserCtxtPtr ctxt;
14720
14721 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14722 if (ctxt == NULL)
14723 return (NULL);
14724 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14725 }
14726
14727 /**
14728 * xmlReadFd:
14729 * @fd: an open file descriptor
14730 * @URL: the base URL to use for the document
14731 * @encoding: the document encoding, or NULL
14732 * @options: a combination of xmlParserOption
14733 *
14734 * parse an XML from a file descriptor and build a tree.
14735 * NOTE that the file descriptor will not be closed when the
14736 * reader is closed or reset.
14737 *
14738 * Returns the resulting document tree
14739 */
14740 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)14741 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14742 {
14743 xmlParserCtxtPtr ctxt;
14744 xmlParserInputBufferPtr input;
14745 xmlParserInputPtr stream;
14746
14747 if (fd < 0)
14748 return (NULL);
14749
14750 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14751 if (input == NULL)
14752 return (NULL);
14753 input->closecallback = NULL;
14754 ctxt = xmlNewParserCtxt();
14755 if (ctxt == NULL) {
14756 xmlFreeParserInputBuffer(input);
14757 return (NULL);
14758 }
14759 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14760 if (stream == NULL) {
14761 xmlFreeParserInputBuffer(input);
14762 xmlFreeParserCtxt(ctxt);
14763 return (NULL);
14764 }
14765 inputPush(ctxt, stream);
14766 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14767 }
14768
14769 /**
14770 * xmlReadIO:
14771 * @ioread: an I/O read function
14772 * @ioclose: an I/O close function
14773 * @ioctx: an I/O handler
14774 * @URL: the base URL to use for the document
14775 * @encoding: the document encoding, or NULL
14776 * @options: a combination of xmlParserOption
14777 *
14778 * parse an XML document from I/O functions and source and build a tree.
14779 *
14780 * Returns the resulting document tree
14781 */
14782 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14783 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14784 void *ioctx, const char *URL, const char *encoding, int options)
14785 {
14786 xmlParserCtxtPtr ctxt;
14787 xmlParserInputBufferPtr input;
14788 xmlParserInputPtr stream;
14789
14790 if (ioread == NULL)
14791 return (NULL);
14792
14793 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14794 XML_CHAR_ENCODING_NONE);
14795 if (input == NULL)
14796 return (NULL);
14797 ctxt = xmlNewParserCtxt();
14798 if (ctxt == NULL) {
14799 xmlFreeParserInputBuffer(input);
14800 return (NULL);
14801 }
14802 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14803 if (stream == NULL) {
14804 xmlFreeParserInputBuffer(input);
14805 xmlFreeParserCtxt(ctxt);
14806 return (NULL);
14807 }
14808 inputPush(ctxt, stream);
14809 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14810 }
14811
14812 /**
14813 * xmlCtxtReadDoc:
14814 * @ctxt: an XML parser context
14815 * @cur: a pointer to a zero terminated string
14816 * @URL: the base URL to use for the document
14817 * @encoding: the document encoding, or NULL
14818 * @options: a combination of xmlParserOption
14819 *
14820 * parse an XML in-memory document and build a tree.
14821 * This reuses the existing @ctxt parser context
14822 *
14823 * Returns the resulting document tree
14824 */
14825 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * cur,const char * URL,const char * encoding,int options)14826 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14827 const char *URL, const char *encoding, int options)
14828 {
14829 xmlParserInputPtr stream;
14830
14831 if (cur == NULL)
14832 return (NULL);
14833 if (ctxt == NULL)
14834 return (NULL);
14835
14836 xmlCtxtReset(ctxt);
14837
14838 stream = xmlNewStringInputStream(ctxt, cur);
14839 if (stream == NULL) {
14840 return (NULL);
14841 }
14842 inputPush(ctxt, stream);
14843 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14844 }
14845
14846 /**
14847 * xmlCtxtReadFile:
14848 * @ctxt: an XML parser context
14849 * @filename: a file or URL
14850 * @encoding: the document encoding, or NULL
14851 * @options: a combination of xmlParserOption
14852 *
14853 * parse an XML file from the filesystem or the network.
14854 * This reuses the existing @ctxt parser context
14855 *
14856 * Returns the resulting document tree
14857 */
14858 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)14859 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14860 const char *encoding, int options)
14861 {
14862 xmlParserInputPtr stream;
14863
14864 if (filename == NULL)
14865 return (NULL);
14866 if (ctxt == NULL)
14867 return (NULL);
14868
14869 xmlCtxtReset(ctxt);
14870
14871 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14872 if (stream == NULL) {
14873 return (NULL);
14874 }
14875 inputPush(ctxt, stream);
14876 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14877 }
14878
14879 /**
14880 * xmlCtxtReadMemory:
14881 * @ctxt: an XML parser context
14882 * @buffer: a pointer to a char array
14883 * @size: the size of the array
14884 * @URL: the base URL to use for the document
14885 * @encoding: the document encoding, or NULL
14886 * @options: a combination of xmlParserOption
14887 *
14888 * parse an XML in-memory document and build a tree.
14889 * This reuses the existing @ctxt parser context
14890 *
14891 * Returns the resulting document tree
14892 */
14893 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14894 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14895 const char *URL, const char *encoding, int options)
14896 {
14897 xmlParserInputBufferPtr input;
14898 xmlParserInputPtr stream;
14899
14900 if (ctxt == NULL)
14901 return (NULL);
14902 if (buffer == NULL)
14903 return (NULL);
14904
14905 xmlCtxtReset(ctxt);
14906
14907 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14908 if (input == NULL) {
14909 return(NULL);
14910 }
14911
14912 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14913 if (stream == NULL) {
14914 xmlFreeParserInputBuffer(input);
14915 return(NULL);
14916 }
14917
14918 inputPush(ctxt, stream);
14919 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14920 }
14921
14922 /**
14923 * xmlCtxtReadFd:
14924 * @ctxt: an XML parser context
14925 * @fd: an open file descriptor
14926 * @URL: the base URL to use for the document
14927 * @encoding: the document encoding, or NULL
14928 * @options: a combination of xmlParserOption
14929 *
14930 * parse an XML from a file descriptor and build a tree.
14931 * This reuses the existing @ctxt parser context
14932 * NOTE that the file descriptor will not be closed when the
14933 * reader is closed or reset.
14934 *
14935 * Returns the resulting document tree
14936 */
14937 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14938 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14939 const char *URL, const char *encoding, int options)
14940 {
14941 xmlParserInputBufferPtr input;
14942 xmlParserInputPtr stream;
14943
14944 if (fd < 0)
14945 return (NULL);
14946 if (ctxt == NULL)
14947 return (NULL);
14948
14949 xmlCtxtReset(ctxt);
14950
14951
14952 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14953 if (input == NULL)
14954 return (NULL);
14955 input->closecallback = NULL;
14956 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14957 if (stream == NULL) {
14958 xmlFreeParserInputBuffer(input);
14959 return (NULL);
14960 }
14961 inputPush(ctxt, stream);
14962 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14963 }
14964
14965 /**
14966 * xmlCtxtReadIO:
14967 * @ctxt: an XML parser context
14968 * @ioread: an I/O read function
14969 * @ioclose: an I/O close function
14970 * @ioctx: an I/O handler
14971 * @URL: the base URL to use for the document
14972 * @encoding: the document encoding, or NULL
14973 * @options: a combination of xmlParserOption
14974 *
14975 * parse an XML document from I/O functions and source and build a tree.
14976 * This reuses the existing @ctxt parser context
14977 *
14978 * Returns the resulting document tree
14979 */
14980 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14981 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14982 xmlInputCloseCallback ioclose, void *ioctx,
14983 const char *URL,
14984 const char *encoding, int options)
14985 {
14986 xmlParserInputBufferPtr input;
14987 xmlParserInputPtr stream;
14988
14989 if (ioread == NULL)
14990 return (NULL);
14991 if (ctxt == NULL)
14992 return (NULL);
14993
14994 xmlCtxtReset(ctxt);
14995
14996 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14997 XML_CHAR_ENCODING_NONE);
14998 if (input == NULL)
14999 return (NULL);
15000 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15001 if (stream == NULL) {
15002 xmlFreeParserInputBuffer(input);
15003 return (NULL);
15004 }
15005 inputPush(ctxt, stream);
15006 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15007 }
15008
15009 #define bottom_parser
15010 #include "elfgcchack.h"
15011