1 /* 2 * Summary: the core parser module 3 * Description: Interfaces, constants and types related to the XML parser 4 * 5 * Copy: See Copyright for the status of this software. 6 * 7 * Author: Daniel Veillard 8 */ 9 10 #ifndef __XML_PARSER_H__ 11 #define __XML_PARSER_H__ 12 13 #include <stdarg.h> 14 15 #include <libxml/xmlversion.h> 16 #include <libxml/tree.h> 17 #include <libxml/dict.h> 18 #include <libxml/hash.h> 19 #include <libxml/valid.h> 20 #include <libxml/entities.h> 21 #include <libxml/xmlerror.h> 22 #include <libxml/xmlstring.h> 23 24 #ifdef __cplusplus 25 extern "C" { 26 #endif 27 28 /** 29 * XML_DEFAULT_VERSION: 30 * 31 * The default version of XML used: 1.0 32 */ 33 #define XML_DEFAULT_VERSION "1.0" 34 35 /** 36 * xmlParserInput: 37 * 38 * An xmlParserInput is an input flow for the XML processor. 39 * Each entity parsed is associated an xmlParserInput (except the 40 * few predefined ones). This is the case both for internal entities 41 * - in which case the flow is already completely in memory - or 42 * external entities - in which case we use the buf structure for 43 * progressive reading and I18N conversions to the internal UTF-8 format. 44 */ 45 46 /** 47 * xmlParserInputDeallocate: 48 * @str: the string to deallocate 49 * 50 * Callback for freeing some parser input allocations. 51 */ 52 typedef void (* xmlParserInputDeallocate)(xmlChar *str); 53 54 struct _xmlParserInput { 55 /* Input buffer */ 56 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 57 58 const char *filename; /* The file analyzed, if any */ 59 const char *directory; /* the directory/base of the file */ 60 const xmlChar *base; /* Base of the array to parse */ 61 const xmlChar *cur; /* Current char being parsed */ 62 const xmlChar *end; /* end of the array to parse */ 63 int length; /* length if known */ 64 int line; /* Current line */ 65 int col; /* Current column */ 66 /* 67 * NOTE: consumed is only tested for equality in the parser code, 68 * so even if there is an overflow this should not give troubles 69 * for parsing very large instances. 70 */ 71 unsigned long consumed; /* How many xmlChars already consumed */ 72 xmlParserInputDeallocate free; /* function to deallocate the base */ 73 const xmlChar *encoding; /* the encoding string for entity */ 74 const xmlChar *version; /* the version string for entity */ 75 int standalone; /* Was that entity marked standalone */ 76 int id; /* an unique identifier for the entity */ 77 }; 78 79 /** 80 * xmlParserNodeInfo: 81 * 82 * The parser can be asked to collect Node information, i.e. at what 83 * place in the file they were detected. 84 * NOTE: This is off by default and not very well tested. 85 */ 86 typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 87 typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 88 89 struct _xmlParserNodeInfo { 90 const struct _xmlNode* node; 91 /* Position & line # that text that created the node begins & ends on */ 92 unsigned long begin_pos; 93 unsigned long begin_line; 94 unsigned long end_pos; 95 unsigned long end_line; 96 }; 97 98 typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 99 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 100 struct _xmlParserNodeInfoSeq { 101 unsigned long maximum; 102 unsigned long length; 103 xmlParserNodeInfo* buffer; 104 }; 105 106 /** 107 * xmlParserInputState: 108 * 109 * The parser is now working also as a state based parser. 110 * The recursive one use the state info for entities processing. 111 */ 112 typedef enum { 113 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 114 XML_PARSER_START = 0, /* nothing has been parsed */ 115 XML_PARSER_MISC, /* Misc* before int subset */ 116 XML_PARSER_PI, /* Within a processing instruction */ 117 XML_PARSER_DTD, /* within some DTD content */ 118 XML_PARSER_PROLOG, /* Misc* after internal subset */ 119 XML_PARSER_COMMENT, /* within a comment */ 120 XML_PARSER_START_TAG, /* within a start tag */ 121 XML_PARSER_CONTENT, /* within the content */ 122 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 123 XML_PARSER_END_TAG, /* within a closing tag */ 124 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 125 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 126 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 127 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 128 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 129 XML_PARSER_IGNORE, /* within an IGNORED section */ 130 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ 131 } xmlParserInputState; 132 133 /** 134 * XML_DETECT_IDS: 135 * 136 * Bit in the loadsubset context field to tell to do ID/REFs lookups. 137 * Use it to initialize xmlLoadExtDtdDefaultValue. 138 */ 139 #define XML_DETECT_IDS 2 140 141 /** 142 * XML_COMPLETE_ATTRS: 143 * 144 * Bit in the loadsubset context field to tell to do complete the 145 * elements attributes lists with the ones defaulted from the DTDs. 146 * Use it to initialize xmlLoadExtDtdDefaultValue. 147 */ 148 #define XML_COMPLETE_ATTRS 4 149 150 /** 151 * XML_SKIP_IDS: 152 * 153 * Bit in the loadsubset context field to tell to not do ID/REFs registration. 154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. 155 */ 156 #define XML_SKIP_IDS 8 157 158 /** 159 * xmlParserMode: 160 * 161 * A parser can operate in various modes 162 */ 163 typedef enum { 164 XML_PARSE_UNKNOWN = 0, 165 XML_PARSE_DOM = 1, 166 XML_PARSE_SAX = 2, 167 XML_PARSE_PUSH_DOM = 3, 168 XML_PARSE_PUSH_SAX = 4, 169 XML_PARSE_READER = 5 170 } xmlParserMode; 171 172 typedef struct _xmlStartTag xmlStartTag; 173 174 /** 175 * xmlParserCtxt: 176 * 177 * The parser context. 178 * NOTE This doesn't completely define the parser state, the (current ?) 179 * design of the parser uses recursive function calls since this allow 180 * and easy mapping from the production rules of the specification 181 * to the actual code. The drawback is that the actual function call 182 * also reflect the parser state. However most of the parsing routines 183 * takes as the only argument the parser context pointer, so migrating 184 * to a state based parser for progressive parsing shouldn't be too hard. 185 */ 186 struct _xmlParserCtxt { 187 struct _xmlSAXHandler *sax; /* The SAX handler */ 188 void *userData; /* For SAX interface only, used by DOM build */ 189 xmlDocPtr myDoc; /* the document being built */ 190 int wellFormed; /* is the document well formed */ 191 int replaceEntities; /* shall we replace entities ? */ 192 const xmlChar *version; /* the XML version string */ 193 const xmlChar *encoding; /* the declared encoding, if any */ 194 int standalone; /* standalone document */ 195 int html; /* an HTML(1)/Docbook(2) document 196 * 3 is HTML after <head> 197 * 10 is HTML after <body> 198 */ 199 200 /* Input stream stack */ 201 xmlParserInputPtr input; /* Current input stream */ 202 int inputNr; /* Number of current input streams */ 203 int inputMax; /* Max number of input streams */ 204 xmlParserInputPtr *inputTab; /* stack of inputs */ 205 206 /* Node analysis stack only used for DOM building */ 207 xmlNodePtr node; /* Current parsed Node */ 208 int nodeNr; /* Depth of the parsing stack */ 209 int nodeMax; /* Max depth of the parsing stack */ 210 xmlNodePtr *nodeTab; /* array of nodes */ 211 212 int record_info; /* Whether node info should be kept */ 213 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 214 215 int errNo; /* error code */ 216 217 int hasExternalSubset; /* reference and external subset */ 218 int hasPErefs; /* the internal subset has PE refs */ 219 int external; /* are we parsing an external entity */ 220 221 int valid; /* is the document valid */ 222 int validate; /* shall we try to validate ? */ 223 xmlValidCtxt vctxt; /* The validity context */ 224 225 xmlParserInputState instate; /* current type of input */ 226 int token; /* next char look-ahead */ 227 228 char *directory; /* the data directory */ 229 230 /* Node name stack */ 231 const xmlChar *name; /* Current parsed Node */ 232 int nameNr; /* Depth of the parsing stack */ 233 int nameMax; /* Max depth of the parsing stack */ 234 const xmlChar * *nameTab; /* array of nodes */ 235 236 long nbChars; /* unused */ 237 long checkIndex; /* used by progressive parsing lookup */ 238 int keepBlanks; /* ugly but ... */ 239 int disableSAX; /* SAX callbacks are disabled */ 240 int inSubset; /* Parsing is in int 1/ext 2 subset */ 241 const xmlChar * intSubName; /* name of subset */ 242 xmlChar * extSubURI; /* URI of external subset */ 243 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 244 245 /* xml:space values */ 246 int * space; /* Should the parser preserve spaces */ 247 int spaceNr; /* Depth of the parsing stack */ 248 int spaceMax; /* Max depth of the parsing stack */ 249 int * spaceTab; /* array of space infos */ 250 251 int depth; /* to prevent entity substitution loops */ 252 xmlParserInputPtr entity; /* used to check entities boundaries */ 253 int charset; /* encoding of the in-memory content 254 actually an xmlCharEncoding */ 255 int nodelen; /* Those two fields are there to */ 256 int nodemem; /* Speed up large node parsing */ 257 int pedantic; /* signal pedantic warnings */ 258 void *_private; /* For user data, libxml won't touch it */ 259 260 int loadsubset; /* should the external subset be loaded */ 261 int linenumbers; /* set line number in element content */ 262 void *catalogs; /* document's own catalog */ 263 int recovery; /* run in recovery mode */ 264 int progressive; /* is this a progressive parsing */ 265 xmlDictPtr dict; /* dictionary for the parser */ 266 const xmlChar * *atts; /* array for the attributes callbacks */ 267 int maxatts; /* the size of the array */ 268 int docdict; /* use strings from dict to build tree */ 269 270 /* 271 * pre-interned strings 272 */ 273 const xmlChar *str_xml; 274 const xmlChar *str_xmlns; 275 const xmlChar *str_xml_ns; 276 277 /* 278 * Everything below is used only by the new SAX mode 279 */ 280 int sax2; /* operating in the new SAX mode */ 281 int nsNr; /* the number of inherited namespaces */ 282 int nsMax; /* the size of the arrays */ 283 const xmlChar * *nsTab; /* the array of prefix/namespace name */ 284 int *attallocs; /* which attribute were allocated */ 285 xmlStartTag *pushTab; /* array of data for push */ 286 xmlHashTablePtr attsDefault; /* defaulted attributes if any */ 287 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ 288 int nsWellFormed; /* is the document XML Namespace okay */ 289 int options; /* Extra options */ 290 291 /* 292 * Those fields are needed only for streaming parsing so far 293 */ 294 int dictNames; /* Use dictionary names for the tree */ 295 int freeElemsNr; /* number of freed element nodes */ 296 xmlNodePtr freeElems; /* List of freed element nodes */ 297 int freeAttrsNr; /* number of freed attributes nodes */ 298 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ 299 300 /* 301 * the complete error information for the last error. 302 */ 303 xmlError lastError; 304 xmlParserMode parseMode; /* the parser mode */ 305 unsigned long nbentities; /* number of entities references */ 306 unsigned long sizeentities; /* size of parsed entities */ 307 308 /* for use by HTML non-recursive parser */ 309 xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */ 310 int nodeInfoNr; /* Depth of the parsing stack */ 311 int nodeInfoMax; /* Max depth of the parsing stack */ 312 xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */ 313 314 int input_id; /* we need to label inputs */ 315 unsigned long sizeentcopy; /* volume of entity copy */ 316 }; 317 318 /** 319 * xmlSAXLocator: 320 * 321 * A SAX Locator. 322 */ 323 struct _xmlSAXLocator { 324 const xmlChar *(*getPublicId)(void *ctx); 325 const xmlChar *(*getSystemId)(void *ctx); 326 int (*getLineNumber)(void *ctx); 327 int (*getColumnNumber)(void *ctx); 328 }; 329 330 /** 331 * xmlSAXHandler: 332 * 333 * A SAX handler is bunch of callbacks called by the parser when processing 334 * of the input generate data or structure information. 335 */ 336 337 /** 338 * resolveEntitySAXFunc: 339 * @ctx: the user data (XML parser context) 340 * @publicId: The public ID of the entity 341 * @systemId: The system ID of the entity 342 * 343 * Callback: 344 * The entity loader, to control the loading of external entities, 345 * the application can either: 346 * - override this resolveEntity() callback in the SAX block 347 * - or better use the xmlSetExternalEntityLoader() function to 348 * set up it's own entity resolution routine 349 * 350 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. 351 */ 352 typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 353 const xmlChar *publicId, 354 const xmlChar *systemId); 355 /** 356 * internalSubsetSAXFunc: 357 * @ctx: the user data (XML parser context) 358 * @name: the root element name 359 * @ExternalID: the external ID 360 * @SystemID: the SYSTEM ID (e.g. filename or URL) 361 * 362 * Callback on internal subset declaration. 363 */ 364 typedef void (*internalSubsetSAXFunc) (void *ctx, 365 const xmlChar *name, 366 const xmlChar *ExternalID, 367 const xmlChar *SystemID); 368 /** 369 * externalSubsetSAXFunc: 370 * @ctx: the user data (XML parser context) 371 * @name: the root element name 372 * @ExternalID: the external ID 373 * @SystemID: the SYSTEM ID (e.g. filename or URL) 374 * 375 * Callback on external subset declaration. 376 */ 377 typedef void (*externalSubsetSAXFunc) (void *ctx, 378 const xmlChar *name, 379 const xmlChar *ExternalID, 380 const xmlChar *SystemID); 381 /** 382 * getEntitySAXFunc: 383 * @ctx: the user data (XML parser context) 384 * @name: The entity name 385 * 386 * Get an entity by name. 387 * 388 * Returns the xmlEntityPtr if found. 389 */ 390 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 391 const xmlChar *name); 392 /** 393 * getParameterEntitySAXFunc: 394 * @ctx: the user data (XML parser context) 395 * @name: The entity name 396 * 397 * Get a parameter entity by name. 398 * 399 * Returns the xmlEntityPtr if found. 400 */ 401 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 402 const xmlChar *name); 403 /** 404 * entityDeclSAXFunc: 405 * @ctx: the user data (XML parser context) 406 * @name: the entity name 407 * @type: the entity type 408 * @publicId: The public ID of the entity 409 * @systemId: The system ID of the entity 410 * @content: the entity value (without processing). 411 * 412 * An entity definition has been parsed. 413 */ 414 typedef void (*entityDeclSAXFunc) (void *ctx, 415 const xmlChar *name, 416 int type, 417 const xmlChar *publicId, 418 const xmlChar *systemId, 419 xmlChar *content); 420 /** 421 * notationDeclSAXFunc: 422 * @ctx: the user data (XML parser context) 423 * @name: The name of the notation 424 * @publicId: The public ID of the entity 425 * @systemId: The system ID of the entity 426 * 427 * What to do when a notation declaration has been parsed. 428 */ 429 typedef void (*notationDeclSAXFunc)(void *ctx, 430 const xmlChar *name, 431 const xmlChar *publicId, 432 const xmlChar *systemId); 433 /** 434 * attributeDeclSAXFunc: 435 * @ctx: the user data (XML parser context) 436 * @elem: the name of the element 437 * @fullname: the attribute name 438 * @type: the attribute type 439 * @def: the type of default value 440 * @defaultValue: the attribute default value 441 * @tree: the tree of enumerated value set 442 * 443 * An attribute definition has been parsed. 444 */ 445 typedef void (*attributeDeclSAXFunc)(void *ctx, 446 const xmlChar *elem, 447 const xmlChar *fullname, 448 int type, 449 int def, 450 const xmlChar *defaultValue, 451 xmlEnumerationPtr tree); 452 /** 453 * elementDeclSAXFunc: 454 * @ctx: the user data (XML parser context) 455 * @name: the element name 456 * @type: the element type 457 * @content: the element value tree 458 * 459 * An element definition has been parsed. 460 */ 461 typedef void (*elementDeclSAXFunc)(void *ctx, 462 const xmlChar *name, 463 int type, 464 xmlElementContentPtr content); 465 /** 466 * unparsedEntityDeclSAXFunc: 467 * @ctx: the user data (XML parser context) 468 * @name: The name of the entity 469 * @publicId: The public ID of the entity 470 * @systemId: The system ID of the entity 471 * @notationName: the name of the notation 472 * 473 * What to do when an unparsed entity declaration is parsed. 474 */ 475 typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 476 const xmlChar *name, 477 const xmlChar *publicId, 478 const xmlChar *systemId, 479 const xmlChar *notationName); 480 /** 481 * setDocumentLocatorSAXFunc: 482 * @ctx: the user data (XML parser context) 483 * @loc: A SAX Locator 484 * 485 * Receive the document locator at startup, actually xmlDefaultSAXLocator. 486 * Everything is available on the context, so this is useless in our case. 487 */ 488 typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 489 xmlSAXLocatorPtr loc); 490 /** 491 * startDocumentSAXFunc: 492 * @ctx: the user data (XML parser context) 493 * 494 * Called when the document start being processed. 495 */ 496 typedef void (*startDocumentSAXFunc) (void *ctx); 497 /** 498 * endDocumentSAXFunc: 499 * @ctx: the user data (XML parser context) 500 * 501 * Called when the document end has been detected. 502 */ 503 typedef void (*endDocumentSAXFunc) (void *ctx); 504 /** 505 * startElementSAXFunc: 506 * @ctx: the user data (XML parser context) 507 * @name: The element name, including namespace prefix 508 * @atts: An array of name/value attributes pairs, NULL terminated 509 * 510 * Called when an opening tag has been processed. 511 */ 512 typedef void (*startElementSAXFunc) (void *ctx, 513 const xmlChar *name, 514 const xmlChar **atts); 515 /** 516 * endElementSAXFunc: 517 * @ctx: the user data (XML parser context) 518 * @name: The element name 519 * 520 * Called when the end of an element has been detected. 521 */ 522 typedef void (*endElementSAXFunc) (void *ctx, 523 const xmlChar *name); 524 /** 525 * attributeSAXFunc: 526 * @ctx: the user data (XML parser context) 527 * @name: The attribute name, including namespace prefix 528 * @value: The attribute value 529 * 530 * Handle an attribute that has been read by the parser. 531 * The default handling is to convert the attribute into an 532 * DOM subtree and past it in a new xmlAttr element added to 533 * the element. 534 */ 535 typedef void (*attributeSAXFunc) (void *ctx, 536 const xmlChar *name, 537 const xmlChar *value); 538 /** 539 * referenceSAXFunc: 540 * @ctx: the user data (XML parser context) 541 * @name: The entity name 542 * 543 * Called when an entity reference is detected. 544 */ 545 typedef void (*referenceSAXFunc) (void *ctx, 546 const xmlChar *name); 547 /** 548 * charactersSAXFunc: 549 * @ctx: the user data (XML parser context) 550 * @ch: a xmlChar string 551 * @len: the number of xmlChar 552 * 553 * Receiving some chars from the parser. 554 */ 555 typedef void (*charactersSAXFunc) (void *ctx, 556 const xmlChar *ch, 557 int len); 558 /** 559 * ignorableWhitespaceSAXFunc: 560 * @ctx: the user data (XML parser context) 561 * @ch: a xmlChar string 562 * @len: the number of xmlChar 563 * 564 * Receiving some ignorable whitespaces from the parser. 565 * UNUSED: by default the DOM building will use characters. 566 */ 567 typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 568 const xmlChar *ch, 569 int len); 570 /** 571 * processingInstructionSAXFunc: 572 * @ctx: the user data (XML parser context) 573 * @target: the target name 574 * @data: the PI data's 575 * 576 * A processing instruction has been parsed. 577 */ 578 typedef void (*processingInstructionSAXFunc) (void *ctx, 579 const xmlChar *target, 580 const xmlChar *data); 581 /** 582 * commentSAXFunc: 583 * @ctx: the user data (XML parser context) 584 * @value: the comment content 585 * 586 * A comment has been parsed. 587 */ 588 typedef void (*commentSAXFunc) (void *ctx, 589 const xmlChar *value); 590 /** 591 * cdataBlockSAXFunc: 592 * @ctx: the user data (XML parser context) 593 * @value: The pcdata content 594 * @len: the block length 595 * 596 * Called when a pcdata block has been parsed. 597 */ 598 typedef void (*cdataBlockSAXFunc) ( 599 void *ctx, 600 const xmlChar *value, 601 int len); 602 /** 603 * warningSAXFunc: 604 * @ctx: an XML parser context 605 * @msg: the message to display/transmit 606 * @...: extra parameters for the message display 607 * 608 * Display and format a warning messages, callback. 609 */ 610 typedef void (XMLCDECL *warningSAXFunc) (void *ctx, 611 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 612 /** 613 * errorSAXFunc: 614 * @ctx: an XML parser context 615 * @msg: the message to display/transmit 616 * @...: extra parameters for the message display 617 * 618 * Display and format an error messages, callback. 619 */ 620 typedef void (XMLCDECL *errorSAXFunc) (void *ctx, 621 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 622 /** 623 * fatalErrorSAXFunc: 624 * @ctx: an XML parser context 625 * @msg: the message to display/transmit 626 * @...: extra parameters for the message display 627 * 628 * Display and format fatal error messages, callback. 629 * Note: so far fatalError() SAX callbacks are not used, error() 630 * get all the callbacks for errors. 631 */ 632 typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx, 633 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 634 /** 635 * isStandaloneSAXFunc: 636 * @ctx: the user data (XML parser context) 637 * 638 * Is this document tagged standalone? 639 * 640 * Returns 1 if true 641 */ 642 typedef int (*isStandaloneSAXFunc) (void *ctx); 643 /** 644 * hasInternalSubsetSAXFunc: 645 * @ctx: the user data (XML parser context) 646 * 647 * Does this document has an internal subset. 648 * 649 * Returns 1 if true 650 */ 651 typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 652 653 /** 654 * hasExternalSubsetSAXFunc: 655 * @ctx: the user data (XML parser context) 656 * 657 * Does this document has an external subset? 658 * 659 * Returns 1 if true 660 */ 661 typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 662 663 /************************************************************************ 664 * * 665 * The SAX version 2 API extensions * 666 * * 667 ************************************************************************/ 668 /** 669 * XML_SAX2_MAGIC: 670 * 671 * Special constant found in SAX2 blocks initialized fields 672 */ 673 #define XML_SAX2_MAGIC 0xDEEDBEAF 674 675 /** 676 * startElementNsSAX2Func: 677 * @ctx: the user data (XML parser context) 678 * @localname: the local name of the element 679 * @prefix: the element namespace prefix if available 680 * @URI: the element namespace name if available 681 * @nb_namespaces: number of namespace definitions on that node 682 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions 683 * @nb_attributes: the number of attributes on that node 684 * @nb_defaulted: the number of defaulted attributes. The defaulted 685 * ones are at the end of the array 686 * @attributes: pointer to the array of (localname/prefix/URI/value/end) 687 * attribute values. 688 * 689 * SAX2 callback when an element start has been detected by the parser. 690 * It provides the namespace information for the element, as well as 691 * the new namespace declarations on the element. 692 */ 693 694 typedef void (*startElementNsSAX2Func) (void *ctx, 695 const xmlChar *localname, 696 const xmlChar *prefix, 697 const xmlChar *URI, 698 int nb_namespaces, 699 const xmlChar **namespaces, 700 int nb_attributes, 701 int nb_defaulted, 702 const xmlChar **attributes); 703 704 /** 705 * endElementNsSAX2Func: 706 * @ctx: the user data (XML parser context) 707 * @localname: the local name of the element 708 * @prefix: the element namespace prefix if available 709 * @URI: the element namespace name if available 710 * 711 * SAX2 callback when an element end has been detected by the parser. 712 * It provides the namespace information for the element. 713 */ 714 715 typedef void (*endElementNsSAX2Func) (void *ctx, 716 const xmlChar *localname, 717 const xmlChar *prefix, 718 const xmlChar *URI); 719 720 721 struct _xmlSAXHandler { 722 internalSubsetSAXFunc internalSubset; 723 isStandaloneSAXFunc isStandalone; 724 hasInternalSubsetSAXFunc hasInternalSubset; 725 hasExternalSubsetSAXFunc hasExternalSubset; 726 resolveEntitySAXFunc resolveEntity; 727 getEntitySAXFunc getEntity; 728 entityDeclSAXFunc entityDecl; 729 notationDeclSAXFunc notationDecl; 730 attributeDeclSAXFunc attributeDecl; 731 elementDeclSAXFunc elementDecl; 732 unparsedEntityDeclSAXFunc unparsedEntityDecl; 733 setDocumentLocatorSAXFunc setDocumentLocator; 734 startDocumentSAXFunc startDocument; 735 endDocumentSAXFunc endDocument; 736 startElementSAXFunc startElement; 737 endElementSAXFunc endElement; 738 referenceSAXFunc reference; 739 charactersSAXFunc characters; 740 ignorableWhitespaceSAXFunc ignorableWhitespace; 741 processingInstructionSAXFunc processingInstruction; 742 commentSAXFunc comment; 743 warningSAXFunc warning; 744 errorSAXFunc error; 745 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 746 getParameterEntitySAXFunc getParameterEntity; 747 cdataBlockSAXFunc cdataBlock; 748 externalSubsetSAXFunc externalSubset; 749 unsigned int initialized; 750 /* The following fields are extensions available only on version 2 */ 751 void *_private; 752 startElementNsSAX2Func startElementNs; 753 endElementNsSAX2Func endElementNs; 754 xmlStructuredErrorFunc serror; 755 }; 756 757 /* 758 * SAX Version 1 759 */ 760 typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1; 761 typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr; 762 struct _xmlSAXHandlerV1 { 763 internalSubsetSAXFunc internalSubset; 764 isStandaloneSAXFunc isStandalone; 765 hasInternalSubsetSAXFunc hasInternalSubset; 766 hasExternalSubsetSAXFunc hasExternalSubset; 767 resolveEntitySAXFunc resolveEntity; 768 getEntitySAXFunc getEntity; 769 entityDeclSAXFunc entityDecl; 770 notationDeclSAXFunc notationDecl; 771 attributeDeclSAXFunc attributeDecl; 772 elementDeclSAXFunc elementDecl; 773 unparsedEntityDeclSAXFunc unparsedEntityDecl; 774 setDocumentLocatorSAXFunc setDocumentLocator; 775 startDocumentSAXFunc startDocument; 776 endDocumentSAXFunc endDocument; 777 startElementSAXFunc startElement; 778 endElementSAXFunc endElement; 779 referenceSAXFunc reference; 780 charactersSAXFunc characters; 781 ignorableWhitespaceSAXFunc ignorableWhitespace; 782 processingInstructionSAXFunc processingInstruction; 783 commentSAXFunc comment; 784 warningSAXFunc warning; 785 errorSAXFunc error; 786 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 787 getParameterEntitySAXFunc getParameterEntity; 788 cdataBlockSAXFunc cdataBlock; 789 externalSubsetSAXFunc externalSubset; 790 unsigned int initialized; 791 }; 792 793 794 /** 795 * xmlExternalEntityLoader: 796 * @URL: The System ID of the resource requested 797 * @ID: The Public ID of the resource requested 798 * @context: the XML parser context 799 * 800 * External entity loaders types. 801 * 802 * Returns the entity input parser. 803 */ 804 typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, 805 const char *ID, 806 xmlParserCtxtPtr context); 807 808 #ifdef __cplusplus 809 } 810 #endif 811 812 #include <libxml/encoding.h> 813 #include <libxml/xmlIO.h> 814 #include <libxml/globals.h> 815 816 #ifdef __cplusplus 817 extern "C" { 818 #endif 819 820 821 /* 822 * Init/Cleanup 823 */ 824 XMLPUBFUN void XMLCALL 825 xmlInitParser (void); 826 XMLPUBFUN void XMLCALL 827 xmlCleanupParser (void); 828 829 /* 830 * Input functions 831 */ 832 XMLPUBFUN int XMLCALL 833 xmlParserInputRead (xmlParserInputPtr in, 834 int len); 835 XMLPUBFUN int XMLCALL 836 xmlParserInputGrow (xmlParserInputPtr in, 837 int len); 838 839 /* 840 * Basic parsing Interfaces 841 */ 842 #ifdef LIBXML_SAX1_ENABLED 843 XMLPUBFUN xmlDocPtr XMLCALL 844 xmlParseDoc (const xmlChar *cur); 845 XMLPUBFUN xmlDocPtr XMLCALL 846 xmlParseFile (const char *filename); 847 XMLPUBFUN xmlDocPtr XMLCALL 848 xmlParseMemory (const char *buffer, 849 int size); 850 #endif /* LIBXML_SAX1_ENABLED */ 851 XMLPUBFUN int XMLCALL 852 xmlSubstituteEntitiesDefault(int val); 853 XMLPUBFUN int XMLCALL 854 xmlKeepBlanksDefault (int val); 855 XMLPUBFUN void XMLCALL 856 xmlStopParser (xmlParserCtxtPtr ctxt); 857 XMLPUBFUN int XMLCALL 858 xmlPedanticParserDefault(int val); 859 XMLPUBFUN int XMLCALL 860 xmlLineNumbersDefault (int val); 861 862 #ifdef LIBXML_SAX1_ENABLED 863 /* 864 * Recovery mode 865 */ 866 XMLPUBFUN xmlDocPtr XMLCALL 867 xmlRecoverDoc (const xmlChar *cur); 868 XMLPUBFUN xmlDocPtr XMLCALL 869 xmlRecoverMemory (const char *buffer, 870 int size); 871 XMLPUBFUN xmlDocPtr XMLCALL 872 xmlRecoverFile (const char *filename); 873 #endif /* LIBXML_SAX1_ENABLED */ 874 875 /* 876 * Less common routines and SAX interfaces 877 */ 878 XMLPUBFUN int XMLCALL 879 xmlParseDocument (xmlParserCtxtPtr ctxt); 880 XMLPUBFUN int XMLCALL 881 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 882 #ifdef LIBXML_SAX1_ENABLED 883 XMLPUBFUN int XMLCALL 884 xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 885 void *user_data, 886 const char *filename); 887 XMLPUBFUN int XMLCALL 888 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 889 void *user_data, 890 const char *buffer, 891 int size); 892 XMLPUBFUN xmlDocPtr XMLCALL 893 xmlSAXParseDoc (xmlSAXHandlerPtr sax, 894 const xmlChar *cur, 895 int recovery); 896 XMLPUBFUN xmlDocPtr XMLCALL 897 xmlSAXParseMemory (xmlSAXHandlerPtr sax, 898 const char *buffer, 899 int size, 900 int recovery); 901 XMLPUBFUN xmlDocPtr XMLCALL 902 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, 903 const char *buffer, 904 int size, 905 int recovery, 906 void *data); 907 XMLPUBFUN xmlDocPtr XMLCALL 908 xmlSAXParseFile (xmlSAXHandlerPtr sax, 909 const char *filename, 910 int recovery); 911 XMLPUBFUN xmlDocPtr XMLCALL 912 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, 913 const char *filename, 914 int recovery, 915 void *data); 916 XMLPUBFUN xmlDocPtr XMLCALL 917 xmlSAXParseEntity (xmlSAXHandlerPtr sax, 918 const char *filename); 919 XMLPUBFUN xmlDocPtr XMLCALL 920 xmlParseEntity (const char *filename); 921 #endif /* LIBXML_SAX1_ENABLED */ 922 923 #ifdef LIBXML_VALID_ENABLED 924 XMLPUBFUN xmlDtdPtr XMLCALL 925 xmlSAXParseDTD (xmlSAXHandlerPtr sax, 926 const xmlChar *ExternalID, 927 const xmlChar *SystemID); 928 XMLPUBFUN xmlDtdPtr XMLCALL 929 xmlParseDTD (const xmlChar *ExternalID, 930 const xmlChar *SystemID); 931 XMLPUBFUN xmlDtdPtr XMLCALL 932 xmlIOParseDTD (xmlSAXHandlerPtr sax, 933 xmlParserInputBufferPtr input, 934 xmlCharEncoding enc); 935 #endif /* LIBXML_VALID_ENABLE */ 936 #ifdef LIBXML_SAX1_ENABLED 937 XMLPUBFUN int XMLCALL 938 xmlParseBalancedChunkMemory(xmlDocPtr doc, 939 xmlSAXHandlerPtr sax, 940 void *user_data, 941 int depth, 942 const xmlChar *string, 943 xmlNodePtr *lst); 944 #endif /* LIBXML_SAX1_ENABLED */ 945 XMLPUBFUN xmlParserErrors XMLCALL 946 xmlParseInNodeContext (xmlNodePtr node, 947 const char *data, 948 int datalen, 949 int options, 950 xmlNodePtr *lst); 951 #ifdef LIBXML_SAX1_ENABLED 952 XMLPUBFUN int XMLCALL 953 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, 954 xmlSAXHandlerPtr sax, 955 void *user_data, 956 int depth, 957 const xmlChar *string, 958 xmlNodePtr *lst, 959 int recover); 960 XMLPUBFUN int XMLCALL 961 xmlParseExternalEntity (xmlDocPtr doc, 962 xmlSAXHandlerPtr sax, 963 void *user_data, 964 int depth, 965 const xmlChar *URL, 966 const xmlChar *ID, 967 xmlNodePtr *lst); 968 #endif /* LIBXML_SAX1_ENABLED */ 969 XMLPUBFUN int XMLCALL 970 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 971 const xmlChar *URL, 972 const xmlChar *ID, 973 xmlNodePtr *lst); 974 975 /* 976 * Parser contexts handling. 977 */ 978 XMLPUBFUN xmlParserCtxtPtr XMLCALL 979 xmlNewParserCtxt (void); 980 XMLPUBFUN int XMLCALL 981 xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 982 XMLPUBFUN void XMLCALL 983 xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 984 XMLPUBFUN void XMLCALL 985 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 986 #ifdef LIBXML_SAX1_ENABLED 987 XMLPUBFUN void XMLCALL 988 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 989 const xmlChar* buffer, 990 const char *filename); 991 #endif /* LIBXML_SAX1_ENABLED */ 992 XMLPUBFUN xmlParserCtxtPtr XMLCALL 993 xmlCreateDocParserCtxt (const xmlChar *cur); 994 995 #ifdef LIBXML_LEGACY_ENABLED 996 /* 997 * Reading/setting optional parsing features. 998 */ 999 XMLPUBFUN int XMLCALL 1000 xmlGetFeaturesList (int *len, 1001 const char **result); 1002 XMLPUBFUN int XMLCALL 1003 xmlGetFeature (xmlParserCtxtPtr ctxt, 1004 const char *name, 1005 void *result); 1006 XMLPUBFUN int XMLCALL 1007 xmlSetFeature (xmlParserCtxtPtr ctxt, 1008 const char *name, 1009 void *value); 1010 #endif /* LIBXML_LEGACY_ENABLED */ 1011 1012 #ifdef LIBXML_PUSH_ENABLED 1013 /* 1014 * Interfaces for the Push mode. 1015 */ 1016 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1017 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 1018 void *user_data, 1019 const char *chunk, 1020 int size, 1021 const char *filename); 1022 XMLPUBFUN int XMLCALL 1023 xmlParseChunk (xmlParserCtxtPtr ctxt, 1024 const char *chunk, 1025 int size, 1026 int terminate); 1027 #endif /* LIBXML_PUSH_ENABLED */ 1028 1029 /* 1030 * Special I/O mode. 1031 */ 1032 1033 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1034 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 1035 void *user_data, 1036 xmlInputReadCallback ioread, 1037 xmlInputCloseCallback ioclose, 1038 void *ioctx, 1039 xmlCharEncoding enc); 1040 1041 XMLPUBFUN xmlParserInputPtr XMLCALL 1042 xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 1043 xmlParserInputBufferPtr input, 1044 xmlCharEncoding enc); 1045 1046 /* 1047 * Node infos. 1048 */ 1049 XMLPUBFUN const xmlParserNodeInfo* XMLCALL 1050 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, 1051 const xmlNodePtr node); 1052 XMLPUBFUN void XMLCALL 1053 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1054 XMLPUBFUN void XMLCALL 1055 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1056 XMLPUBFUN unsigned long XMLCALL 1057 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1058 const xmlNodePtr node); 1059 XMLPUBFUN void XMLCALL 1060 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 1061 const xmlParserNodeInfoPtr info); 1062 1063 /* 1064 * External entities handling actually implemented in xmlIO. 1065 */ 1066 1067 XMLPUBFUN void XMLCALL 1068 xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 1069 XMLPUBFUN xmlExternalEntityLoader XMLCALL 1070 xmlGetExternalEntityLoader(void); 1071 XMLPUBFUN xmlParserInputPtr XMLCALL 1072 xmlLoadExternalEntity (const char *URL, 1073 const char *ID, 1074 xmlParserCtxtPtr ctxt); 1075 1076 /* 1077 * Index lookup, actually implemented in the encoding module 1078 */ 1079 XMLPUBFUN long XMLCALL 1080 xmlByteConsumed (xmlParserCtxtPtr ctxt); 1081 1082 /* 1083 * New set of simpler/more flexible APIs 1084 */ 1085 /** 1086 * xmlParserOption: 1087 * 1088 * This is the set of XML parser options that can be passed down 1089 * to the xmlReadDoc() and similar calls. 1090 */ 1091 typedef enum { 1092 XML_PARSE_RECOVER = 1<<0, /* recover on errors */ 1093 XML_PARSE_NOENT = 1<<1, /* substitute entities */ 1094 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ 1095 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ 1096 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ 1097 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1098 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ 1099 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1100 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1101 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ 1102 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitution */ 1103 XML_PARSE_NONET = 1<<11,/* Forbid network access */ 1104 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionary */ 1105 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ 1106 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ 1107 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ 1108 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of 1109 the tree allowed afterwards (will possibly 1110 crash if you try to modify the tree) */ 1111 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */ 1112 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */ 1113 XML_PARSE_HUGE = 1<<19,/* relax any hardcoded limit from the parser */ 1114 XML_PARSE_OLDSAX = 1<<20,/* parse using SAX2 interface before 2.7.0 */ 1115 XML_PARSE_IGNORE_ENC= 1<<21,/* ignore internal document encoding hint */ 1116 XML_PARSE_BIG_LINES = 1<<22 /* Store big lines numbers in text PSVI field */ 1117 } xmlParserOption; 1118 1119 XMLPUBFUN void XMLCALL 1120 xmlCtxtReset (xmlParserCtxtPtr ctxt); 1121 XMLPUBFUN int XMLCALL 1122 xmlCtxtResetPush (xmlParserCtxtPtr ctxt, 1123 const char *chunk, 1124 int size, 1125 const char *filename, 1126 const char *encoding); 1127 XMLPUBFUN int XMLCALL 1128 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, 1129 int options); 1130 XMLPUBFUN xmlDocPtr XMLCALL 1131 xmlReadDoc (const xmlChar *cur, 1132 const char *URL, 1133 const char *encoding, 1134 int options); 1135 XMLPUBFUN xmlDocPtr XMLCALL 1136 xmlReadFile (const char *URL, 1137 const char *encoding, 1138 int options); 1139 XMLPUBFUN xmlDocPtr XMLCALL 1140 xmlReadMemory (const char *buffer, 1141 int size, 1142 const char *URL, 1143 const char *encoding, 1144 int options); 1145 XMLPUBFUN xmlDocPtr XMLCALL 1146 xmlReadFd (int fd, 1147 const char *URL, 1148 const char *encoding, 1149 int options); 1150 XMLPUBFUN xmlDocPtr XMLCALL 1151 xmlReadIO (xmlInputReadCallback ioread, 1152 xmlInputCloseCallback ioclose, 1153 void *ioctx, 1154 const char *URL, 1155 const char *encoding, 1156 int options); 1157 XMLPUBFUN xmlDocPtr XMLCALL 1158 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 1159 const xmlChar *cur, 1160 const char *URL, 1161 const char *encoding, 1162 int options); 1163 XMLPUBFUN xmlDocPtr XMLCALL 1164 xmlCtxtReadFile (xmlParserCtxtPtr ctxt, 1165 const char *filename, 1166 const char *encoding, 1167 int options); 1168 XMLPUBFUN xmlDocPtr XMLCALL 1169 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 1170 const char *buffer, 1171 int size, 1172 const char *URL, 1173 const char *encoding, 1174 int options); 1175 XMLPUBFUN xmlDocPtr XMLCALL 1176 xmlCtxtReadFd (xmlParserCtxtPtr ctxt, 1177 int fd, 1178 const char *URL, 1179 const char *encoding, 1180 int options); 1181 XMLPUBFUN xmlDocPtr XMLCALL 1182 xmlCtxtReadIO (xmlParserCtxtPtr ctxt, 1183 xmlInputReadCallback ioread, 1184 xmlInputCloseCallback ioclose, 1185 void *ioctx, 1186 const char *URL, 1187 const char *encoding, 1188 int options); 1189 1190 /* 1191 * Library wide options 1192 */ 1193 /** 1194 * xmlFeature: 1195 * 1196 * Used to examine the existence of features that can be enabled 1197 * or disabled at compile-time. 1198 * They used to be called XML_FEATURE_xxx but this clashed with Expat 1199 */ 1200 typedef enum { 1201 XML_WITH_THREAD = 1, 1202 XML_WITH_TREE = 2, 1203 XML_WITH_OUTPUT = 3, 1204 XML_WITH_PUSH = 4, 1205 XML_WITH_READER = 5, 1206 XML_WITH_PATTERN = 6, 1207 XML_WITH_WRITER = 7, 1208 XML_WITH_SAX1 = 8, 1209 XML_WITH_FTP = 9, 1210 XML_WITH_HTTP = 10, 1211 XML_WITH_VALID = 11, 1212 XML_WITH_HTML = 12, 1213 XML_WITH_LEGACY = 13, 1214 XML_WITH_C14N = 14, 1215 XML_WITH_CATALOG = 15, 1216 XML_WITH_XPATH = 16, 1217 XML_WITH_XPTR = 17, 1218 XML_WITH_XINCLUDE = 18, 1219 XML_WITH_ICONV = 19, 1220 XML_WITH_ISO8859X = 20, 1221 XML_WITH_UNICODE = 21, 1222 XML_WITH_REGEXP = 22, 1223 XML_WITH_AUTOMATA = 23, 1224 XML_WITH_EXPR = 24, 1225 XML_WITH_SCHEMAS = 25, 1226 XML_WITH_SCHEMATRON = 26, 1227 XML_WITH_MODULES = 27, 1228 XML_WITH_DEBUG = 28, 1229 XML_WITH_DEBUG_MEM = 29, 1230 XML_WITH_DEBUG_RUN = 30, 1231 XML_WITH_ZLIB = 31, 1232 XML_WITH_ICU = 32, 1233 XML_WITH_LZMA = 33, 1234 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ 1235 } xmlFeature; 1236 1237 XMLPUBFUN int XMLCALL 1238 xmlHasFeature (xmlFeature feature); 1239 1240 #ifdef __cplusplus 1241 } 1242 #endif 1243 #endif /* __XML_PARSER_H__ */ 1244