1 /* 2 * Summary: the core parser module 3 * Description: Interfaces, constants and types related to the XML parser 4 * 5 * Copy: See Copyright for the status of this software. 6 * 7 * Author: Daniel Veillard 8 */ 9 10 #ifndef __XML_PARSER_H__ 11 #define __XML_PARSER_H__ 12 13 #include <libxml/xmlversion.h> 14 #include <libxml/tree.h> 15 #include <libxml/dict.h> 16 #include <libxml/hash.h> 17 #include <libxml/valid.h> 18 #include <libxml/entities.h> 19 #include <libxml/xmlerror.h> 20 #include <libxml/xmlstring.h> 21 22 #ifdef __cplusplus 23 extern "C" { 24 #endif 25 26 /** 27 * XML_DEFAULT_VERSION: 28 * 29 * The default version of XML used: 1.0 30 */ 31 #define XML_DEFAULT_VERSION "1.0" 32 33 /** 34 * xmlParserInput: 35 * 36 * An xmlParserInput is an input flow for the XML processor. 37 * Each entity parsed is associated an xmlParserInput (except the 38 * few predefined ones). This is the case both for internal entities 39 * - in which case the flow is already completely in memory - or 40 * external entities - in which case we use the buf structure for 41 * progressive reading and I18N conversions to the internal UTF-8 format. 42 */ 43 44 /** 45 * xmlParserInputDeallocate: 46 * @str: the string to deallocate 47 * 48 * Callback for freeing some parser input allocations. 49 */ 50 typedef void (* xmlParserInputDeallocate)(xmlChar *str); 51 52 struct _xmlParserInput { 53 /* Input buffer */ 54 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 55 56 const char *filename; /* The file analyzed, if any */ 57 const char *directory; /* the directory/base of the file */ 58 const xmlChar *base; /* Base of the array to parse */ 59 const xmlChar *cur; /* Current char being parsed */ 60 const xmlChar *end; /* end of the array to parse */ 61 int length; /* length if known */ 62 int line; /* Current line */ 63 int col; /* Current column */ 64 unsigned long consumed; /* How many xmlChars already consumed */ 65 xmlParserInputDeallocate free; /* function to deallocate the base */ 66 const xmlChar *encoding; /* the encoding string for entity */ 67 const xmlChar *version; /* the version string for entity */ 68 int standalone; /* Was that entity marked standalone */ 69 int id; /* an unique identifier for the entity */ 70 unsigned long parentConsumed; /* consumed bytes from parents */ 71 xmlEntityPtr entity; /* entity, if any */ 72 }; 73 74 /** 75 * xmlParserNodeInfo: 76 * 77 * The parser can be asked to collect Node information, i.e. at what 78 * place in the file they were detected. 79 * NOTE: This is off by default and not very well tested. 80 */ 81 typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 82 typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 83 84 struct _xmlParserNodeInfo { 85 const struct _xmlNode* node; 86 /* Position & line # that text that created the node begins & ends on */ 87 unsigned long begin_pos; 88 unsigned long begin_line; 89 unsigned long end_pos; 90 unsigned long end_line; 91 }; 92 93 typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 94 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 95 struct _xmlParserNodeInfoSeq { 96 unsigned long maximum; 97 unsigned long length; 98 xmlParserNodeInfo* buffer; 99 }; 100 101 /** 102 * xmlParserInputState: 103 * 104 * The parser is now working also as a state based parser. 105 * The recursive one use the state info for entities processing. 106 */ 107 typedef enum { 108 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 109 XML_PARSER_START = 0, /* nothing has been parsed */ 110 XML_PARSER_MISC, /* Misc* before int subset */ 111 XML_PARSER_PI, /* Within a processing instruction */ 112 XML_PARSER_DTD, /* within some DTD content */ 113 XML_PARSER_PROLOG, /* Misc* after internal subset */ 114 XML_PARSER_COMMENT, /* within a comment */ 115 XML_PARSER_START_TAG, /* within a start tag */ 116 XML_PARSER_CONTENT, /* within the content */ 117 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 118 XML_PARSER_END_TAG, /* within a closing tag */ 119 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 120 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 121 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 122 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 123 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 124 XML_PARSER_IGNORE, /* within an IGNORED section */ 125 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ 126 } xmlParserInputState; 127 128 /** 129 * XML_DETECT_IDS: 130 * 131 * Bit in the loadsubset context field to tell to do ID/REFs lookups. 132 * Use it to initialize xmlLoadExtDtdDefaultValue. 133 */ 134 #define XML_DETECT_IDS 2 135 136 /** 137 * XML_COMPLETE_ATTRS: 138 * 139 * Bit in the loadsubset context field to tell to do complete the 140 * elements attributes lists with the ones defaulted from the DTDs. 141 * Use it to initialize xmlLoadExtDtdDefaultValue. 142 */ 143 #define XML_COMPLETE_ATTRS 4 144 145 /** 146 * XML_SKIP_IDS: 147 * 148 * Bit in the loadsubset context field to tell to not do ID/REFs registration. 149 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. 150 */ 151 #define XML_SKIP_IDS 8 152 153 /** 154 * xmlParserMode: 155 * 156 * A parser can operate in various modes 157 */ 158 typedef enum { 159 XML_PARSE_UNKNOWN = 0, 160 XML_PARSE_DOM = 1, 161 XML_PARSE_SAX = 2, 162 XML_PARSE_PUSH_DOM = 3, 163 XML_PARSE_PUSH_SAX = 4, 164 XML_PARSE_READER = 5 165 } xmlParserMode; 166 167 typedef struct _xmlStartTag xmlStartTag; 168 169 /** 170 * xmlParserCtxt: 171 * 172 * The parser context. 173 * NOTE This doesn't completely define the parser state, the (current ?) 174 * design of the parser uses recursive function calls since this allow 175 * and easy mapping from the production rules of the specification 176 * to the actual code. The drawback is that the actual function call 177 * also reflect the parser state. However most of the parsing routines 178 * takes as the only argument the parser context pointer, so migrating 179 * to a state based parser for progressive parsing shouldn't be too hard. 180 */ 181 struct _xmlParserCtxt { 182 struct _xmlSAXHandler *sax; /* The SAX handler */ 183 void *userData; /* For SAX interface only, used by DOM build */ 184 xmlDocPtr myDoc; /* the document being built */ 185 int wellFormed; /* is the document well formed */ 186 int replaceEntities; /* shall we replace entities ? */ 187 const xmlChar *version; /* the XML version string */ 188 const xmlChar *encoding; /* the declared encoding, if any */ 189 int standalone; /* standalone document */ 190 int html; /* an HTML(1) document 191 * 3 is HTML after <head> 192 * 10 is HTML after <body> 193 */ 194 195 /* Input stream stack */ 196 xmlParserInputPtr input; /* Current input stream */ 197 int inputNr; /* Number of current input streams */ 198 int inputMax; /* Max number of input streams */ 199 xmlParserInputPtr *inputTab; /* stack of inputs */ 200 201 /* Node analysis stack only used for DOM building */ 202 xmlNodePtr node; /* Current parsed Node */ 203 int nodeNr; /* Depth of the parsing stack */ 204 int nodeMax; /* Max depth of the parsing stack */ 205 xmlNodePtr *nodeTab; /* array of nodes */ 206 207 int record_info; /* Whether node info should be kept */ 208 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 209 210 int errNo; /* error code */ 211 212 int hasExternalSubset; /* reference and external subset */ 213 int hasPErefs; /* the internal subset has PE refs */ 214 int external; /* are we parsing an external entity */ 215 216 int valid; /* is the document valid */ 217 int validate; /* shall we try to validate ? */ 218 xmlValidCtxt vctxt; /* The validity context */ 219 220 xmlParserInputState instate; /* current type of input */ 221 int token; /* next char look-ahead */ 222 223 char *directory; /* the data directory */ 224 225 /* Node name stack */ 226 const xmlChar *name; /* Current parsed Node */ 227 int nameNr; /* Depth of the parsing stack */ 228 int nameMax; /* Max depth of the parsing stack */ 229 const xmlChar * *nameTab; /* array of nodes */ 230 231 long nbChars; /* unused */ 232 long checkIndex; /* used by progressive parsing lookup */ 233 int keepBlanks; /* ugly but ... */ 234 int disableSAX; /* SAX callbacks are disabled */ 235 int inSubset; /* Parsing is in int 1/ext 2 subset */ 236 const xmlChar * intSubName; /* name of subset */ 237 xmlChar * extSubURI; /* URI of external subset */ 238 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 239 240 /* xml:space values */ 241 int * space; /* Should the parser preserve spaces */ 242 int spaceNr; /* Depth of the parsing stack */ 243 int spaceMax; /* Max depth of the parsing stack */ 244 int * spaceTab; /* array of space infos */ 245 246 int depth; /* to prevent entity substitution loops */ 247 xmlParserInputPtr entity; /* used to check entities boundaries */ 248 int charset; /* encoding of the in-memory content 249 actually an xmlCharEncoding */ 250 int nodelen; /* Those two fields are there to */ 251 int nodemem; /* Speed up large node parsing */ 252 int pedantic; /* signal pedantic warnings */ 253 void *_private; /* For user data, libxml won't touch it */ 254 255 int loadsubset; /* should the external subset be loaded */ 256 int linenumbers; /* set line number in element content */ 257 void *catalogs; /* document's own catalog */ 258 int recovery; /* run in recovery mode */ 259 int progressive; /* is this a progressive parsing */ 260 xmlDictPtr dict; /* dictionary for the parser */ 261 const xmlChar * *atts; /* array for the attributes callbacks */ 262 int maxatts; /* the size of the array */ 263 int docdict; /* use strings from dict to build tree */ 264 265 /* 266 * pre-interned strings 267 */ 268 const xmlChar *str_xml; 269 const xmlChar *str_xmlns; 270 const xmlChar *str_xml_ns; 271 272 /* 273 * Everything below is used only by the new SAX mode 274 */ 275 int sax2; /* operating in the new SAX mode */ 276 int nsNr; /* the number of inherited namespaces */ 277 int nsMax; /* the size of the arrays */ 278 const xmlChar * *nsTab; /* the array of prefix/namespace name */ 279 int *attallocs; /* which attribute were allocated */ 280 xmlStartTag *pushTab; /* array of data for push */ 281 xmlHashTablePtr attsDefault; /* defaulted attributes if any */ 282 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ 283 int nsWellFormed; /* is the document XML Namespace okay */ 284 int options; /* Extra options */ 285 286 /* 287 * Those fields are needed only for streaming parsing so far 288 */ 289 int dictNames; /* Use dictionary names for the tree */ 290 int freeElemsNr; /* number of freed element nodes */ 291 xmlNodePtr freeElems; /* List of freed element nodes */ 292 int freeAttrsNr; /* number of freed attributes nodes */ 293 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ 294 295 /* 296 * the complete error information for the last error. 297 */ 298 xmlError lastError; 299 xmlParserMode parseMode; /* the parser mode */ 300 unsigned long nbentities; /* unused */ 301 unsigned long sizeentities; /* size of parsed entities */ 302 303 /* for use by HTML non-recursive parser */ 304 xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */ 305 int nodeInfoNr; /* Depth of the parsing stack */ 306 int nodeInfoMax; /* Max depth of the parsing stack */ 307 xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */ 308 309 int input_id; /* we need to label inputs */ 310 unsigned long sizeentcopy; /* volume of entity copy */ 311 312 int endCheckState; /* quote state for push parser */ 313 unsigned short nbErrors; /* number of errors */ 314 unsigned short nbWarnings; /* number of warnings */ 315 }; 316 317 /** 318 * xmlSAXLocator: 319 * 320 * A SAX Locator. 321 */ 322 struct _xmlSAXLocator { 323 const xmlChar *(*getPublicId)(void *ctx); 324 const xmlChar *(*getSystemId)(void *ctx); 325 int (*getLineNumber)(void *ctx); 326 int (*getColumnNumber)(void *ctx); 327 }; 328 329 /** 330 * xmlSAXHandler: 331 * 332 * A SAX handler is bunch of callbacks called by the parser when processing 333 * of the input generate data or structure information. 334 */ 335 336 /** 337 * resolveEntitySAXFunc: 338 * @ctx: the user data (XML parser context) 339 * @publicId: The public ID of the entity 340 * @systemId: The system ID of the entity 341 * 342 * Callback: 343 * The entity loader, to control the loading of external entities, 344 * the application can either: 345 * - override this resolveEntity() callback in the SAX block 346 * - or better use the xmlSetExternalEntityLoader() function to 347 * set up it's own entity resolution routine 348 * 349 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. 350 */ 351 typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 352 const xmlChar *publicId, 353 const xmlChar *systemId); 354 /** 355 * internalSubsetSAXFunc: 356 * @ctx: the user data (XML parser context) 357 * @name: the root element name 358 * @ExternalID: the external ID 359 * @SystemID: the SYSTEM ID (e.g. filename or URL) 360 * 361 * Callback on internal subset declaration. 362 */ 363 typedef void (*internalSubsetSAXFunc) (void *ctx, 364 const xmlChar *name, 365 const xmlChar *ExternalID, 366 const xmlChar *SystemID); 367 /** 368 * externalSubsetSAXFunc: 369 * @ctx: the user data (XML parser context) 370 * @name: the root element name 371 * @ExternalID: the external ID 372 * @SystemID: the SYSTEM ID (e.g. filename or URL) 373 * 374 * Callback on external subset declaration. 375 */ 376 typedef void (*externalSubsetSAXFunc) (void *ctx, 377 const xmlChar *name, 378 const xmlChar *ExternalID, 379 const xmlChar *SystemID); 380 /** 381 * getEntitySAXFunc: 382 * @ctx: the user data (XML parser context) 383 * @name: The entity name 384 * 385 * Get an entity by name. 386 * 387 * Returns the xmlEntityPtr if found. 388 */ 389 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 390 const xmlChar *name); 391 /** 392 * getParameterEntitySAXFunc: 393 * @ctx: the user data (XML parser context) 394 * @name: The entity name 395 * 396 * Get a parameter entity by name. 397 * 398 * Returns the xmlEntityPtr if found. 399 */ 400 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 401 const xmlChar *name); 402 /** 403 * entityDeclSAXFunc: 404 * @ctx: the user data (XML parser context) 405 * @name: the entity name 406 * @type: the entity type 407 * @publicId: The public ID of the entity 408 * @systemId: The system ID of the entity 409 * @content: the entity value (without processing). 410 * 411 * An entity definition has been parsed. 412 */ 413 typedef void (*entityDeclSAXFunc) (void *ctx, 414 const xmlChar *name, 415 int type, 416 const xmlChar *publicId, 417 const xmlChar *systemId, 418 xmlChar *content); 419 /** 420 * notationDeclSAXFunc: 421 * @ctx: the user data (XML parser context) 422 * @name: The name of the notation 423 * @publicId: The public ID of the entity 424 * @systemId: The system ID of the entity 425 * 426 * What to do when a notation declaration has been parsed. 427 */ 428 typedef void (*notationDeclSAXFunc)(void *ctx, 429 const xmlChar *name, 430 const xmlChar *publicId, 431 const xmlChar *systemId); 432 /** 433 * attributeDeclSAXFunc: 434 * @ctx: the user data (XML parser context) 435 * @elem: the name of the element 436 * @fullname: the attribute name 437 * @type: the attribute type 438 * @def: the type of default value 439 * @defaultValue: the attribute default value 440 * @tree: the tree of enumerated value set 441 * 442 * An attribute definition has been parsed. 443 */ 444 typedef void (*attributeDeclSAXFunc)(void *ctx, 445 const xmlChar *elem, 446 const xmlChar *fullname, 447 int type, 448 int def, 449 const xmlChar *defaultValue, 450 xmlEnumerationPtr tree); 451 /** 452 * elementDeclSAXFunc: 453 * @ctx: the user data (XML parser context) 454 * @name: the element name 455 * @type: the element type 456 * @content: the element value tree 457 * 458 * An element definition has been parsed. 459 */ 460 typedef void (*elementDeclSAXFunc)(void *ctx, 461 const xmlChar *name, 462 int type, 463 xmlElementContentPtr content); 464 /** 465 * unparsedEntityDeclSAXFunc: 466 * @ctx: the user data (XML parser context) 467 * @name: The name of the entity 468 * @publicId: The public ID of the entity 469 * @systemId: The system ID of the entity 470 * @notationName: the name of the notation 471 * 472 * What to do when an unparsed entity declaration is parsed. 473 */ 474 typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 475 const xmlChar *name, 476 const xmlChar *publicId, 477 const xmlChar *systemId, 478 const xmlChar *notationName); 479 /** 480 * setDocumentLocatorSAXFunc: 481 * @ctx: the user data (XML parser context) 482 * @loc: A SAX Locator 483 * 484 * Receive the document locator at startup, actually xmlDefaultSAXLocator. 485 * Everything is available on the context, so this is useless in our case. 486 */ 487 typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 488 xmlSAXLocatorPtr loc); 489 /** 490 * startDocumentSAXFunc: 491 * @ctx: the user data (XML parser context) 492 * 493 * Called when the document start being processed. 494 */ 495 typedef void (*startDocumentSAXFunc) (void *ctx); 496 /** 497 * endDocumentSAXFunc: 498 * @ctx: the user data (XML parser context) 499 * 500 * Called when the document end has been detected. 501 */ 502 typedef void (*endDocumentSAXFunc) (void *ctx); 503 /** 504 * startElementSAXFunc: 505 * @ctx: the user data (XML parser context) 506 * @name: The element name, including namespace prefix 507 * @atts: An array of name/value attributes pairs, NULL terminated 508 * 509 * Called when an opening tag has been processed. 510 */ 511 typedef void (*startElementSAXFunc) (void *ctx, 512 const xmlChar *name, 513 const xmlChar **atts); 514 /** 515 * endElementSAXFunc: 516 * @ctx: the user data (XML parser context) 517 * @name: The element name 518 * 519 * Called when the end of an element has been detected. 520 */ 521 typedef void (*endElementSAXFunc) (void *ctx, 522 const xmlChar *name); 523 /** 524 * attributeSAXFunc: 525 * @ctx: the user data (XML parser context) 526 * @name: The attribute name, including namespace prefix 527 * @value: The attribute value 528 * 529 * Handle an attribute that has been read by the parser. 530 * The default handling is to convert the attribute into an 531 * DOM subtree and past it in a new xmlAttr element added to 532 * the element. 533 */ 534 typedef void (*attributeSAXFunc) (void *ctx, 535 const xmlChar *name, 536 const xmlChar *value); 537 /** 538 * referenceSAXFunc: 539 * @ctx: the user data (XML parser context) 540 * @name: The entity name 541 * 542 * Called when an entity reference is detected. 543 */ 544 typedef void (*referenceSAXFunc) (void *ctx, 545 const xmlChar *name); 546 /** 547 * charactersSAXFunc: 548 * @ctx: the user data (XML parser context) 549 * @ch: a xmlChar string 550 * @len: the number of xmlChar 551 * 552 * Receiving some chars from the parser. 553 */ 554 typedef void (*charactersSAXFunc) (void *ctx, 555 const xmlChar *ch, 556 int len); 557 /** 558 * ignorableWhitespaceSAXFunc: 559 * @ctx: the user data (XML parser context) 560 * @ch: a xmlChar string 561 * @len: the number of xmlChar 562 * 563 * Receiving some ignorable whitespaces from the parser. 564 * UNUSED: by default the DOM building will use characters. 565 */ 566 typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 567 const xmlChar *ch, 568 int len); 569 /** 570 * processingInstructionSAXFunc: 571 * @ctx: the user data (XML parser context) 572 * @target: the target name 573 * @data: the PI data's 574 * 575 * A processing instruction has been parsed. 576 */ 577 typedef void (*processingInstructionSAXFunc) (void *ctx, 578 const xmlChar *target, 579 const xmlChar *data); 580 /** 581 * commentSAXFunc: 582 * @ctx: the user data (XML parser context) 583 * @value: the comment content 584 * 585 * A comment has been parsed. 586 */ 587 typedef void (*commentSAXFunc) (void *ctx, 588 const xmlChar *value); 589 /** 590 * cdataBlockSAXFunc: 591 * @ctx: the user data (XML parser context) 592 * @value: The pcdata content 593 * @len: the block length 594 * 595 * Called when a pcdata block has been parsed. 596 */ 597 typedef void (*cdataBlockSAXFunc) ( 598 void *ctx, 599 const xmlChar *value, 600 int len); 601 /** 602 * warningSAXFunc: 603 * @ctx: an XML parser context 604 * @msg: the message to display/transmit 605 * @...: extra parameters for the message display 606 * 607 * Display and format a warning messages, callback. 608 */ 609 typedef void (*warningSAXFunc) (void *ctx, 610 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 611 /** 612 * errorSAXFunc: 613 * @ctx: an XML parser context 614 * @msg: the message to display/transmit 615 * @...: extra parameters for the message display 616 * 617 * Display and format an error messages, callback. 618 */ 619 typedef void (*errorSAXFunc) (void *ctx, 620 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 621 /** 622 * fatalErrorSAXFunc: 623 * @ctx: an XML parser context 624 * @msg: the message to display/transmit 625 * @...: extra parameters for the message display 626 * 627 * Display and format fatal error messages, callback. 628 * Note: so far fatalError() SAX callbacks are not used, error() 629 * get all the callbacks for errors. 630 */ 631 typedef void (*fatalErrorSAXFunc) (void *ctx, 632 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 633 /** 634 * isStandaloneSAXFunc: 635 * @ctx: the user data (XML parser context) 636 * 637 * Is this document tagged standalone? 638 * 639 * Returns 1 if true 640 */ 641 typedef int (*isStandaloneSAXFunc) (void *ctx); 642 /** 643 * hasInternalSubsetSAXFunc: 644 * @ctx: the user data (XML parser context) 645 * 646 * Does this document has an internal subset. 647 * 648 * Returns 1 if true 649 */ 650 typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 651 652 /** 653 * hasExternalSubsetSAXFunc: 654 * @ctx: the user data (XML parser context) 655 * 656 * Does this document has an external subset? 657 * 658 * Returns 1 if true 659 */ 660 typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 661 662 /************************************************************************ 663 * * 664 * The SAX version 2 API extensions * 665 * * 666 ************************************************************************/ 667 /** 668 * XML_SAX2_MAGIC: 669 * 670 * Special constant found in SAX2 blocks initialized fields 671 */ 672 #define XML_SAX2_MAGIC 0xDEEDBEAF 673 674 /** 675 * startElementNsSAX2Func: 676 * @ctx: the user data (XML parser context) 677 * @localname: the local name of the element 678 * @prefix: the element namespace prefix if available 679 * @URI: the element namespace name if available 680 * @nb_namespaces: number of namespace definitions on that node 681 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions 682 * @nb_attributes: the number of attributes on that node 683 * @nb_defaulted: the number of defaulted attributes. The defaulted 684 * ones are at the end of the array 685 * @attributes: pointer to the array of (localname/prefix/URI/value/end) 686 * attribute values. 687 * 688 * SAX2 callback when an element start has been detected by the parser. 689 * It provides the namespace information for the element, as well as 690 * the new namespace declarations on the element. 691 */ 692 693 typedef void (*startElementNsSAX2Func) (void *ctx, 694 const xmlChar *localname, 695 const xmlChar *prefix, 696 const xmlChar *URI, 697 int nb_namespaces, 698 const xmlChar **namespaces, 699 int nb_attributes, 700 int nb_defaulted, 701 const xmlChar **attributes); 702 703 /** 704 * endElementNsSAX2Func: 705 * @ctx: the user data (XML parser context) 706 * @localname: the local name of the element 707 * @prefix: the element namespace prefix if available 708 * @URI: the element namespace name if available 709 * 710 * SAX2 callback when an element end has been detected by the parser. 711 * It provides the namespace information for the element. 712 */ 713 714 typedef void (*endElementNsSAX2Func) (void *ctx, 715 const xmlChar *localname, 716 const xmlChar *prefix, 717 const xmlChar *URI); 718 719 720 struct _xmlSAXHandler { 721 internalSubsetSAXFunc internalSubset; 722 isStandaloneSAXFunc isStandalone; 723 hasInternalSubsetSAXFunc hasInternalSubset; 724 hasExternalSubsetSAXFunc hasExternalSubset; 725 resolveEntitySAXFunc resolveEntity; 726 getEntitySAXFunc getEntity; 727 entityDeclSAXFunc entityDecl; 728 notationDeclSAXFunc notationDecl; 729 attributeDeclSAXFunc attributeDecl; 730 elementDeclSAXFunc elementDecl; 731 unparsedEntityDeclSAXFunc unparsedEntityDecl; 732 setDocumentLocatorSAXFunc setDocumentLocator; 733 startDocumentSAXFunc startDocument; 734 endDocumentSAXFunc endDocument; 735 startElementSAXFunc startElement; 736 endElementSAXFunc endElement; 737 referenceSAXFunc reference; 738 charactersSAXFunc characters; 739 ignorableWhitespaceSAXFunc ignorableWhitespace; 740 processingInstructionSAXFunc processingInstruction; 741 commentSAXFunc comment; 742 warningSAXFunc warning; 743 errorSAXFunc error; 744 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 745 getParameterEntitySAXFunc getParameterEntity; 746 cdataBlockSAXFunc cdataBlock; 747 externalSubsetSAXFunc externalSubset; 748 unsigned int initialized; 749 /* The following fields are extensions available only on version 2 */ 750 void *_private; 751 startElementNsSAX2Func startElementNs; 752 endElementNsSAX2Func endElementNs; 753 xmlStructuredErrorFunc serror; 754 }; 755 756 /* 757 * SAX Version 1 758 */ 759 typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1; 760 typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr; 761 struct _xmlSAXHandlerV1 { 762 internalSubsetSAXFunc internalSubset; 763 isStandaloneSAXFunc isStandalone; 764 hasInternalSubsetSAXFunc hasInternalSubset; 765 hasExternalSubsetSAXFunc hasExternalSubset; 766 resolveEntitySAXFunc resolveEntity; 767 getEntitySAXFunc getEntity; 768 entityDeclSAXFunc entityDecl; 769 notationDeclSAXFunc notationDecl; 770 attributeDeclSAXFunc attributeDecl; 771 elementDeclSAXFunc elementDecl; 772 unparsedEntityDeclSAXFunc unparsedEntityDecl; 773 setDocumentLocatorSAXFunc setDocumentLocator; 774 startDocumentSAXFunc startDocument; 775 endDocumentSAXFunc endDocument; 776 startElementSAXFunc startElement; 777 endElementSAXFunc endElement; 778 referenceSAXFunc reference; 779 charactersSAXFunc characters; 780 ignorableWhitespaceSAXFunc ignorableWhitespace; 781 processingInstructionSAXFunc processingInstruction; 782 commentSAXFunc comment; 783 warningSAXFunc warning; 784 errorSAXFunc error; 785 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 786 getParameterEntitySAXFunc getParameterEntity; 787 cdataBlockSAXFunc cdataBlock; 788 externalSubsetSAXFunc externalSubset; 789 unsigned int initialized; 790 }; 791 792 793 /** 794 * xmlExternalEntityLoader: 795 * @URL: The System ID of the resource requested 796 * @ID: The Public ID of the resource requested 797 * @context: the XML parser context 798 * 799 * External entity loaders types. 800 * 801 * Returns the entity input parser. 802 */ 803 typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, 804 const char *ID, 805 xmlParserCtxtPtr context); 806 807 #ifdef __cplusplus 808 } 809 #endif 810 811 #include <libxml/encoding.h> 812 #include <libxml/xmlIO.h> 813 #include <libxml/globals.h> 814 815 #ifdef __cplusplus 816 extern "C" { 817 #endif 818 819 820 /* 821 * Init/Cleanup 822 */ 823 XMLPUBFUN void 824 xmlInitParser (void); 825 XMLPUBFUN void 826 xmlCleanupParser (void); 827 828 /* 829 * Input functions 830 */ 831 XML_DEPRECATED 832 XMLPUBFUN int 833 xmlParserInputRead (xmlParserInputPtr in, 834 int len); 835 XML_DEPRECATED 836 XMLPUBFUN int 837 xmlParserInputGrow (xmlParserInputPtr in, 838 int len); 839 840 /* 841 * Basic parsing Interfaces 842 */ 843 #ifdef LIBXML_SAX1_ENABLED 844 XMLPUBFUN xmlDocPtr 845 xmlParseDoc (const xmlChar *cur); 846 XMLPUBFUN xmlDocPtr 847 xmlParseFile (const char *filename); 848 XMLPUBFUN xmlDocPtr 849 xmlParseMemory (const char *buffer, 850 int size); 851 #endif /* LIBXML_SAX1_ENABLED */ 852 XMLPUBFUN int 853 xmlSubstituteEntitiesDefault(int val); 854 XMLPUBFUN int 855 xmlKeepBlanksDefault (int val); 856 XMLPUBFUN void 857 xmlStopParser (xmlParserCtxtPtr ctxt); 858 XMLPUBFUN int 859 xmlPedanticParserDefault(int val); 860 XMLPUBFUN int 861 xmlLineNumbersDefault (int val); 862 863 #ifdef LIBXML_SAX1_ENABLED 864 /* 865 * Recovery mode 866 */ 867 XML_DEPRECATED 868 XMLPUBFUN xmlDocPtr 869 xmlRecoverDoc (const xmlChar *cur); 870 XML_DEPRECATED 871 XMLPUBFUN xmlDocPtr 872 xmlRecoverMemory (const char *buffer, 873 int size); 874 XML_DEPRECATED 875 XMLPUBFUN xmlDocPtr 876 xmlRecoverFile (const char *filename); 877 #endif /* LIBXML_SAX1_ENABLED */ 878 879 /* 880 * Less common routines and SAX interfaces 881 */ 882 XMLPUBFUN int 883 xmlParseDocument (xmlParserCtxtPtr ctxt); 884 XMLPUBFUN int 885 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 886 #ifdef LIBXML_SAX1_ENABLED 887 XML_DEPRECATED 888 XMLPUBFUN int 889 xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 890 void *user_data, 891 const char *filename); 892 XML_DEPRECATED 893 XMLPUBFUN int 894 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 895 void *user_data, 896 const char *buffer, 897 int size); 898 XML_DEPRECATED 899 XMLPUBFUN xmlDocPtr 900 xmlSAXParseDoc (xmlSAXHandlerPtr sax, 901 const xmlChar *cur, 902 int recovery); 903 XML_DEPRECATED 904 XMLPUBFUN xmlDocPtr 905 xmlSAXParseMemory (xmlSAXHandlerPtr sax, 906 const char *buffer, 907 int size, 908 int recovery); 909 XML_DEPRECATED 910 XMLPUBFUN xmlDocPtr 911 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, 912 const char *buffer, 913 int size, 914 int recovery, 915 void *data); 916 XML_DEPRECATED 917 XMLPUBFUN xmlDocPtr 918 xmlSAXParseFile (xmlSAXHandlerPtr sax, 919 const char *filename, 920 int recovery); 921 XML_DEPRECATED 922 XMLPUBFUN xmlDocPtr 923 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, 924 const char *filename, 925 int recovery, 926 void *data); 927 XML_DEPRECATED 928 XMLPUBFUN xmlDocPtr 929 xmlSAXParseEntity (xmlSAXHandlerPtr sax, 930 const char *filename); 931 XML_DEPRECATED 932 XMLPUBFUN xmlDocPtr 933 xmlParseEntity (const char *filename); 934 #endif /* LIBXML_SAX1_ENABLED */ 935 936 #ifdef LIBXML_VALID_ENABLED 937 XML_DEPRECATED 938 XMLPUBFUN xmlDtdPtr 939 xmlSAXParseDTD (xmlSAXHandlerPtr sax, 940 const xmlChar *ExternalID, 941 const xmlChar *SystemID); 942 XMLPUBFUN xmlDtdPtr 943 xmlParseDTD (const xmlChar *ExternalID, 944 const xmlChar *SystemID); 945 XMLPUBFUN xmlDtdPtr 946 xmlIOParseDTD (xmlSAXHandlerPtr sax, 947 xmlParserInputBufferPtr input, 948 xmlCharEncoding enc); 949 #endif /* LIBXML_VALID_ENABLE */ 950 #ifdef LIBXML_SAX1_ENABLED 951 XMLPUBFUN int 952 xmlParseBalancedChunkMemory(xmlDocPtr doc, 953 xmlSAXHandlerPtr sax, 954 void *user_data, 955 int depth, 956 const xmlChar *string, 957 xmlNodePtr *lst); 958 #endif /* LIBXML_SAX1_ENABLED */ 959 XMLPUBFUN xmlParserErrors 960 xmlParseInNodeContext (xmlNodePtr node, 961 const char *data, 962 int datalen, 963 int options, 964 xmlNodePtr *lst); 965 #ifdef LIBXML_SAX1_ENABLED 966 XMLPUBFUN int 967 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, 968 xmlSAXHandlerPtr sax, 969 void *user_data, 970 int depth, 971 const xmlChar *string, 972 xmlNodePtr *lst, 973 int recover); 974 XML_DEPRECATED 975 XMLPUBFUN int 976 xmlParseExternalEntity (xmlDocPtr doc, 977 xmlSAXHandlerPtr sax, 978 void *user_data, 979 int depth, 980 const xmlChar *URL, 981 const xmlChar *ID, 982 xmlNodePtr *lst); 983 #endif /* LIBXML_SAX1_ENABLED */ 984 XMLPUBFUN int 985 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 986 const xmlChar *URL, 987 const xmlChar *ID, 988 xmlNodePtr *lst); 989 990 /* 991 * Parser contexts handling. 992 */ 993 XMLPUBFUN xmlParserCtxtPtr 994 xmlNewParserCtxt (void); 995 XMLPUBFUN xmlParserCtxtPtr 996 xmlNewSAXParserCtxt (const xmlSAXHandler *sax, 997 void *userData); 998 XMLPUBFUN int 999 xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 1000 XMLPUBFUN void 1001 xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 1002 XMLPUBFUN void 1003 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 1004 #ifdef LIBXML_SAX1_ENABLED 1005 XML_DEPRECATED 1006 XMLPUBFUN void 1007 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 1008 const xmlChar* buffer, 1009 const char *filename); 1010 #endif /* LIBXML_SAX1_ENABLED */ 1011 XMLPUBFUN xmlParserCtxtPtr 1012 xmlCreateDocParserCtxt (const xmlChar *cur); 1013 1014 #ifdef LIBXML_LEGACY_ENABLED 1015 /* 1016 * Reading/setting optional parsing features. 1017 */ 1018 XML_DEPRECATED 1019 XMLPUBFUN int 1020 xmlGetFeaturesList (int *len, 1021 const char **result); 1022 XML_DEPRECATED 1023 XMLPUBFUN int 1024 xmlGetFeature (xmlParserCtxtPtr ctxt, 1025 const char *name, 1026 void *result); 1027 XML_DEPRECATED 1028 XMLPUBFUN int 1029 xmlSetFeature (xmlParserCtxtPtr ctxt, 1030 const char *name, 1031 void *value); 1032 #endif /* LIBXML_LEGACY_ENABLED */ 1033 1034 #ifdef LIBXML_PUSH_ENABLED 1035 /* 1036 * Interfaces for the Push mode. 1037 */ 1038 XMLPUBFUN xmlParserCtxtPtr 1039 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 1040 void *user_data, 1041 const char *chunk, 1042 int size, 1043 const char *filename); 1044 XMLPUBFUN int 1045 xmlParseChunk (xmlParserCtxtPtr ctxt, 1046 const char *chunk, 1047 int size, 1048 int terminate); 1049 #endif /* LIBXML_PUSH_ENABLED */ 1050 1051 /* 1052 * Special I/O mode. 1053 */ 1054 1055 XMLPUBFUN xmlParserCtxtPtr 1056 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 1057 void *user_data, 1058 xmlInputReadCallback ioread, 1059 xmlInputCloseCallback ioclose, 1060 void *ioctx, 1061 xmlCharEncoding enc); 1062 1063 XMLPUBFUN xmlParserInputPtr 1064 xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 1065 xmlParserInputBufferPtr input, 1066 xmlCharEncoding enc); 1067 1068 /* 1069 * Node infos. 1070 */ 1071 XMLPUBFUN const xmlParserNodeInfo* 1072 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, 1073 const xmlNodePtr node); 1074 XMLPUBFUN void 1075 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1076 XMLPUBFUN void 1077 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1078 XMLPUBFUN unsigned long 1079 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1080 const xmlNodePtr node); 1081 XMLPUBFUN void 1082 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 1083 const xmlParserNodeInfoPtr info); 1084 1085 /* 1086 * External entities handling actually implemented in xmlIO. 1087 */ 1088 1089 XMLPUBFUN void 1090 xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 1091 XMLPUBFUN xmlExternalEntityLoader 1092 xmlGetExternalEntityLoader(void); 1093 XMLPUBFUN xmlParserInputPtr 1094 xmlLoadExternalEntity (const char *URL, 1095 const char *ID, 1096 xmlParserCtxtPtr ctxt); 1097 1098 /* 1099 * Index lookup, actually implemented in the encoding module 1100 */ 1101 XMLPUBFUN long 1102 xmlByteConsumed (xmlParserCtxtPtr ctxt); 1103 1104 /* 1105 * New set of simpler/more flexible APIs 1106 */ 1107 /** 1108 * xmlParserOption: 1109 * 1110 * This is the set of XML parser options that can be passed down 1111 * to the xmlReadDoc() and similar calls. 1112 */ 1113 typedef enum { 1114 XML_PARSE_RECOVER = 1<<0, /* recover on errors */ 1115 XML_PARSE_NOENT = 1<<1, /* substitute entities */ 1116 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ 1117 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ 1118 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ 1119 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1120 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ 1121 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1122 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1123 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ 1124 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitution */ 1125 XML_PARSE_NONET = 1<<11,/* Forbid network access */ 1126 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionary */ 1127 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ 1128 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ 1129 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ 1130 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of 1131 the tree allowed afterwards (will possibly 1132 crash if you try to modify the tree) */ 1133 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */ 1134 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */ 1135 XML_PARSE_HUGE = 1<<19,/* relax any hardcoded limit from the parser */ 1136 XML_PARSE_OLDSAX = 1<<20,/* parse using SAX2 interface before 2.7.0 */ 1137 XML_PARSE_IGNORE_ENC= 1<<21,/* ignore internal document encoding hint */ 1138 XML_PARSE_BIG_LINES = 1<<22 /* Store big lines numbers in text PSVI field */ 1139 } xmlParserOption; 1140 1141 XMLPUBFUN void 1142 xmlCtxtReset (xmlParserCtxtPtr ctxt); 1143 XMLPUBFUN int 1144 xmlCtxtResetPush (xmlParserCtxtPtr ctxt, 1145 const char *chunk, 1146 int size, 1147 const char *filename, 1148 const char *encoding); 1149 XMLPUBFUN int 1150 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, 1151 int options); 1152 XMLPUBFUN xmlDocPtr 1153 xmlReadDoc (const xmlChar *cur, 1154 const char *URL, 1155 const char *encoding, 1156 int options); 1157 XMLPUBFUN xmlDocPtr 1158 xmlReadFile (const char *URL, 1159 const char *encoding, 1160 int options); 1161 XMLPUBFUN xmlDocPtr 1162 xmlReadMemory (const char *buffer, 1163 int size, 1164 const char *URL, 1165 const char *encoding, 1166 int options); 1167 XMLPUBFUN xmlDocPtr 1168 xmlReadFd (int fd, 1169 const char *URL, 1170 const char *encoding, 1171 int options); 1172 XMLPUBFUN xmlDocPtr 1173 xmlReadIO (xmlInputReadCallback ioread, 1174 xmlInputCloseCallback ioclose, 1175 void *ioctx, 1176 const char *URL, 1177 const char *encoding, 1178 int options); 1179 XMLPUBFUN xmlDocPtr 1180 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 1181 const xmlChar *cur, 1182 const char *URL, 1183 const char *encoding, 1184 int options); 1185 XMLPUBFUN xmlDocPtr 1186 xmlCtxtReadFile (xmlParserCtxtPtr ctxt, 1187 const char *filename, 1188 const char *encoding, 1189 int options); 1190 XMLPUBFUN xmlDocPtr 1191 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 1192 const char *buffer, 1193 int size, 1194 const char *URL, 1195 const char *encoding, 1196 int options); 1197 XMLPUBFUN xmlDocPtr 1198 xmlCtxtReadFd (xmlParserCtxtPtr ctxt, 1199 int fd, 1200 const char *URL, 1201 const char *encoding, 1202 int options); 1203 XMLPUBFUN xmlDocPtr 1204 xmlCtxtReadIO (xmlParserCtxtPtr ctxt, 1205 xmlInputReadCallback ioread, 1206 xmlInputCloseCallback ioclose, 1207 void *ioctx, 1208 const char *URL, 1209 const char *encoding, 1210 int options); 1211 1212 /* 1213 * Library wide options 1214 */ 1215 /** 1216 * xmlFeature: 1217 * 1218 * Used to examine the existence of features that can be enabled 1219 * or disabled at compile-time. 1220 * They used to be called XML_FEATURE_xxx but this clashed with Expat 1221 */ 1222 typedef enum { 1223 XML_WITH_THREAD = 1, 1224 XML_WITH_TREE = 2, 1225 XML_WITH_OUTPUT = 3, 1226 XML_WITH_PUSH = 4, 1227 XML_WITH_READER = 5, 1228 XML_WITH_PATTERN = 6, 1229 XML_WITH_WRITER = 7, 1230 XML_WITH_SAX1 = 8, 1231 XML_WITH_FTP = 9, 1232 XML_WITH_HTTP = 10, 1233 XML_WITH_VALID = 11, 1234 XML_WITH_HTML = 12, 1235 XML_WITH_LEGACY = 13, 1236 XML_WITH_C14N = 14, 1237 XML_WITH_CATALOG = 15, 1238 XML_WITH_XPATH = 16, 1239 XML_WITH_XPTR = 17, 1240 XML_WITH_XINCLUDE = 18, 1241 XML_WITH_ICONV = 19, 1242 XML_WITH_ISO8859X = 20, 1243 XML_WITH_UNICODE = 21, 1244 XML_WITH_REGEXP = 22, 1245 XML_WITH_AUTOMATA = 23, 1246 XML_WITH_EXPR = 24, 1247 XML_WITH_SCHEMAS = 25, 1248 XML_WITH_SCHEMATRON = 26, 1249 XML_WITH_MODULES = 27, 1250 XML_WITH_DEBUG = 28, 1251 XML_WITH_DEBUG_MEM = 29, 1252 XML_WITH_DEBUG_RUN = 30, 1253 XML_WITH_ZLIB = 31, 1254 XML_WITH_ICU = 32, 1255 XML_WITH_LZMA = 33, 1256 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ 1257 } xmlFeature; 1258 1259 XMLPUBFUN int 1260 xmlHasFeature (xmlFeature feature); 1261 1262 #ifdef __cplusplus 1263 } 1264 #endif 1265 #endif /* __XML_PARSER_H__ */ 1266