1 /* 2 * Summary: the core parser module 3 * Description: Interfaces, constants and types related to the XML parser 4 * 5 * Copy: See Copyright for the status of this software. 6 * 7 * Author: Daniel Veillard 8 */ 9 10 #ifndef __XML_PARSER_H__ 11 #define __XML_PARSER_H__ 12 13 #include <stdarg.h> 14 15 #include <libxml/xmlversion.h> 16 #include <libxml/tree.h> 17 #include <libxml/dict.h> 18 #include <libxml/hash.h> 19 #include <libxml/valid.h> 20 #include <libxml/entities.h> 21 #include <libxml/xmlerror.h> 22 #include <libxml/xmlstring.h> 23 24 #ifdef __cplusplus 25 extern "C" { 26 #endif 27 28 /** 29 * XML_DEFAULT_VERSION: 30 * 31 * The default version of XML used: 1.0 32 */ 33 #define XML_DEFAULT_VERSION "1.0" 34 35 /** 36 * xmlParserInput: 37 * 38 * An xmlParserInput is an input flow for the XML processor. 39 * Each entity parsed is associated an xmlParserInput (except the 40 * few predefined ones). This is the case both for internal entities 41 * - in which case the flow is already completely in memory - or 42 * external entities - in which case we use the buf structure for 43 * progressive reading and I18N conversions to the internal UTF-8 format. 44 */ 45 46 /** 47 * xmlParserInputDeallocate: 48 * @str: the string to deallocate 49 * 50 * Callback for freeing some parser input allocations. 51 */ 52 typedef void (* xmlParserInputDeallocate)(xmlChar *str); 53 54 struct _xmlParserInput { 55 /* Input buffer */ 56 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 57 58 const char *filename; /* The file analyzed, if any */ 59 const char *directory; /* the directory/base of the file */ 60 const xmlChar *base; /* Base of the array to parse */ 61 const xmlChar *cur; /* Current char being parsed */ 62 const xmlChar *end; /* end of the array to parse */ 63 int length; /* length if known */ 64 int line; /* Current line */ 65 int col; /* Current column */ 66 /* 67 * NOTE: consumed is only tested for equality in the parser code, 68 * so even if there is an overflow this should not give troubles 69 * for parsing very large instances. 70 */ 71 unsigned long consumed; /* How many xmlChars already consumed */ 72 xmlParserInputDeallocate free; /* function to deallocate the base */ 73 const xmlChar *encoding; /* the encoding string for entity */ 74 const xmlChar *version; /* the version string for entity */ 75 int standalone; /* Was that entity marked standalone */ 76 int id; /* an unique identifier for the entity */ 77 }; 78 79 /** 80 * xmlParserNodeInfo: 81 * 82 * The parser can be asked to collect Node informations, i.e. at what 83 * place in the file they were detected. 84 * NOTE: This is off by default and not very well tested. 85 */ 86 typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 87 typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 88 89 struct _xmlParserNodeInfo { 90 const struct _xmlNode* node; 91 /* Position & line # that text that created the node begins & ends on */ 92 unsigned long begin_pos; 93 unsigned long begin_line; 94 unsigned long end_pos; 95 unsigned long end_line; 96 }; 97 98 typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 99 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 100 struct _xmlParserNodeInfoSeq { 101 unsigned long maximum; 102 unsigned long length; 103 xmlParserNodeInfo* buffer; 104 }; 105 106 /** 107 * xmlParserInputState: 108 * 109 * The parser is now working also as a state based parser. 110 * The recursive one use the state info for entities processing. 111 */ 112 typedef enum { 113 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 114 XML_PARSER_START = 0, /* nothing has been parsed */ 115 XML_PARSER_MISC, /* Misc* before int subset */ 116 XML_PARSER_PI, /* Within a processing instruction */ 117 XML_PARSER_DTD, /* within some DTD content */ 118 XML_PARSER_PROLOG, /* Misc* after internal subset */ 119 XML_PARSER_COMMENT, /* within a comment */ 120 XML_PARSER_START_TAG, /* within a start tag */ 121 XML_PARSER_CONTENT, /* within the content */ 122 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 123 XML_PARSER_END_TAG, /* within a closing tag */ 124 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 125 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 126 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 127 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 128 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 129 XML_PARSER_IGNORE, /* within an IGNORED section */ 130 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ 131 } xmlParserInputState; 132 133 /** 134 * XML_DETECT_IDS: 135 * 136 * Bit in the loadsubset context field to tell to do ID/REFs lookups. 137 * Use it to initialize xmlLoadExtDtdDefaultValue. 138 */ 139 #define XML_DETECT_IDS 2 140 141 /** 142 * XML_COMPLETE_ATTRS: 143 * 144 * Bit in the loadsubset context field to tell to do complete the 145 * elements attributes lists with the ones defaulted from the DTDs. 146 * Use it to initialize xmlLoadExtDtdDefaultValue. 147 */ 148 #define XML_COMPLETE_ATTRS 4 149 150 /** 151 * XML_SKIP_IDS: 152 * 153 * Bit in the loadsubset context field to tell to not do ID/REFs registration. 154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. 155 */ 156 #define XML_SKIP_IDS 8 157 158 /** 159 * xmlParserMode: 160 * 161 * A parser can operate in various modes 162 */ 163 typedef enum { 164 XML_PARSE_UNKNOWN = 0, 165 XML_PARSE_DOM = 1, 166 XML_PARSE_SAX = 2, 167 XML_PARSE_PUSH_DOM = 3, 168 XML_PARSE_PUSH_SAX = 4, 169 XML_PARSE_READER = 5 170 } xmlParserMode; 171 172 /** 173 * xmlParserCtxt: 174 * 175 * The parser context. 176 * NOTE This doesn't completely define the parser state, the (current ?) 177 * design of the parser uses recursive function calls since this allow 178 * and easy mapping from the production rules of the specification 179 * to the actual code. The drawback is that the actual function call 180 * also reflect the parser state. However most of the parsing routines 181 * takes as the only argument the parser context pointer, so migrating 182 * to a state based parser for progressive parsing shouldn't be too hard. 183 */ 184 struct _xmlParserCtxt { 185 struct _xmlSAXHandler *sax; /* The SAX handler */ 186 void *userData; /* For SAX interface only, used by DOM build */ 187 xmlDocPtr myDoc; /* the document being built */ 188 int wellFormed; /* is the document well formed */ 189 int replaceEntities; /* shall we replace entities ? */ 190 const xmlChar *version; /* the XML version string */ 191 const xmlChar *encoding; /* the declared encoding, if any */ 192 int standalone; /* standalone document */ 193 int html; /* an HTML(1)/Docbook(2) document */ 194 195 /* Input stream stack */ 196 xmlParserInputPtr input; /* Current input stream */ 197 int inputNr; /* Number of current input streams */ 198 int inputMax; /* Max number of input streams */ 199 xmlParserInputPtr *inputTab; /* stack of inputs */ 200 201 /* Node analysis stack only used for DOM building */ 202 xmlNodePtr node; /* Current parsed Node */ 203 int nodeNr; /* Depth of the parsing stack */ 204 int nodeMax; /* Max depth of the parsing stack */ 205 xmlNodePtr *nodeTab; /* array of nodes */ 206 207 int record_info; /* Whether node info should be kept */ 208 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 209 210 int errNo; /* error code */ 211 212 int hasExternalSubset; /* reference and external subset */ 213 int hasPErefs; /* the internal subset has PE refs */ 214 int external; /* are we parsing an external entity */ 215 216 int valid; /* is the document valid */ 217 int validate; /* shall we try to validate ? */ 218 xmlValidCtxt vctxt; /* The validity context */ 219 220 xmlParserInputState instate; /* current type of input */ 221 int token; /* next char look-ahead */ 222 223 char *directory; /* the data directory */ 224 225 /* Node name stack */ 226 const xmlChar *name; /* Current parsed Node */ 227 int nameNr; /* Depth of the parsing stack */ 228 int nameMax; /* Max depth of the parsing stack */ 229 const xmlChar * *nameTab; /* array of nodes */ 230 231 long nbChars; /* number of xmlChar processed */ 232 long checkIndex; /* used by progressive parsing lookup */ 233 int keepBlanks; /* ugly but ... */ 234 int disableSAX; /* SAX callbacks are disabled */ 235 int inSubset; /* Parsing is in int 1/ext 2 subset */ 236 const xmlChar * intSubName; /* name of subset */ 237 xmlChar * extSubURI; /* URI of external subset */ 238 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 239 240 /* xml:space values */ 241 int * space; /* Should the parser preserve spaces */ 242 int spaceNr; /* Depth of the parsing stack */ 243 int spaceMax; /* Max depth of the parsing stack */ 244 int * spaceTab; /* array of space infos */ 245 246 int depth; /* to prevent entity substitution loops */ 247 xmlParserInputPtr entity; /* used to check entities boundaries */ 248 int charset; /* encoding of the in-memory content 249 actually an xmlCharEncoding */ 250 int nodelen; /* Those two fields are there to */ 251 int nodemem; /* Speed up large node parsing */ 252 int pedantic; /* signal pedantic warnings */ 253 void *_private; /* For user data, libxml won't touch it */ 254 255 int loadsubset; /* should the external subset be loaded */ 256 int linenumbers; /* set line number in element content */ 257 void *catalogs; /* document's own catalog */ 258 int recovery; /* run in recovery mode */ 259 int progressive; /* is this a progressive parsing */ 260 xmlDictPtr dict; /* dictionnary for the parser */ 261 const xmlChar * *atts; /* array for the attributes callbacks */ 262 int maxatts; /* the size of the array */ 263 int docdict; /* use strings from dict to build tree */ 264 265 /* 266 * pre-interned strings 267 */ 268 const xmlChar *str_xml; 269 const xmlChar *str_xmlns; 270 const xmlChar *str_xml_ns; 271 272 /* 273 * Everything below is used only by the new SAX mode 274 */ 275 int sax2; /* operating in the new SAX mode */ 276 int nsNr; /* the number of inherited namespaces */ 277 int nsMax; /* the size of the arrays */ 278 const xmlChar * *nsTab; /* the array of prefix/namespace name */ 279 int *attallocs; /* which attribute were allocated */ 280 void * *pushTab; /* array of data for push */ 281 xmlHashTablePtr attsDefault; /* defaulted attributes if any */ 282 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ 283 int nsWellFormed; /* is the document XML Nanespace okay */ 284 int options; /* Extra options */ 285 286 /* 287 * Those fields are needed only for treaming parsing so far 288 */ 289 int dictNames; /* Use dictionary names for the tree */ 290 int freeElemsNr; /* number of freed element nodes */ 291 xmlNodePtr freeElems; /* List of freed element nodes */ 292 int freeAttrsNr; /* number of freed attributes nodes */ 293 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ 294 295 /* 296 * the complete error informations for the last error. 297 */ 298 xmlError lastError; 299 xmlParserMode parseMode; /* the parser mode */ 300 }; 301 302 /** 303 * xmlSAXLocator: 304 * 305 * A SAX Locator. 306 */ 307 struct _xmlSAXLocator { 308 const xmlChar *(*getPublicId)(void *ctx); 309 const xmlChar *(*getSystemId)(void *ctx); 310 int (*getLineNumber)(void *ctx); 311 int (*getColumnNumber)(void *ctx); 312 }; 313 314 /** 315 * xmlSAXHandler: 316 * 317 * A SAX handler is bunch of callbacks called by the parser when processing 318 * of the input generate data or structure informations. 319 */ 320 321 /** 322 * resolveEntitySAXFunc: 323 * @ctx: the user data (XML parser context) 324 * @publicId: The public ID of the entity 325 * @systemId: The system ID of the entity 326 * 327 * Callback: 328 * The entity loader, to control the loading of external entities, 329 * the application can either: 330 * - override this resolveEntity() callback in the SAX block 331 * - or better use the xmlSetExternalEntityLoader() function to 332 * set up it's own entity resolution routine 333 * 334 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. 335 */ 336 typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 337 const xmlChar *publicId, 338 const xmlChar *systemId); 339 /** 340 * internalSubsetSAXFunc: 341 * @ctx: the user data (XML parser context) 342 * @name: the root element name 343 * @ExternalID: the external ID 344 * @SystemID: the SYSTEM ID (e.g. filename or URL) 345 * 346 * Callback on internal subset declaration. 347 */ 348 typedef void (*internalSubsetSAXFunc) (void *ctx, 349 const xmlChar *name, 350 const xmlChar *ExternalID, 351 const xmlChar *SystemID); 352 /** 353 * externalSubsetSAXFunc: 354 * @ctx: the user data (XML parser context) 355 * @name: the root element name 356 * @ExternalID: the external ID 357 * @SystemID: the SYSTEM ID (e.g. filename or URL) 358 * 359 * Callback on external subset declaration. 360 */ 361 typedef void (*externalSubsetSAXFunc) (void *ctx, 362 const xmlChar *name, 363 const xmlChar *ExternalID, 364 const xmlChar *SystemID); 365 /** 366 * getEntitySAXFunc: 367 * @ctx: the user data (XML parser context) 368 * @name: The entity name 369 * 370 * Get an entity by name. 371 * 372 * Returns the xmlEntityPtr if found. 373 */ 374 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 375 const xmlChar *name); 376 /** 377 * getParameterEntitySAXFunc: 378 * @ctx: the user data (XML parser context) 379 * @name: The entity name 380 * 381 * Get a parameter entity by name. 382 * 383 * Returns the xmlEntityPtr if found. 384 */ 385 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 386 const xmlChar *name); 387 /** 388 * entityDeclSAXFunc: 389 * @ctx: the user data (XML parser context) 390 * @name: the entity name 391 * @type: the entity type 392 * @publicId: The public ID of the entity 393 * @systemId: The system ID of the entity 394 * @content: the entity value (without processing). 395 * 396 * An entity definition has been parsed. 397 */ 398 typedef void (*entityDeclSAXFunc) (void *ctx, 399 const xmlChar *name, 400 int type, 401 const xmlChar *publicId, 402 const xmlChar *systemId, 403 xmlChar *content); 404 /** 405 * notationDeclSAXFunc: 406 * @ctx: the user data (XML parser context) 407 * @name: The name of the notation 408 * @publicId: The public ID of the entity 409 * @systemId: The system ID of the entity 410 * 411 * What to do when a notation declaration has been parsed. 412 */ 413 typedef void (*notationDeclSAXFunc)(void *ctx, 414 const xmlChar *name, 415 const xmlChar *publicId, 416 const xmlChar *systemId); 417 /** 418 * attributeDeclSAXFunc: 419 * @ctx: the user data (XML parser context) 420 * @elem: the name of the element 421 * @fullname: the attribute name 422 * @type: the attribute type 423 * @def: the type of default value 424 * @defaultValue: the attribute default value 425 * @tree: the tree of enumerated value set 426 * 427 * An attribute definition has been parsed. 428 */ 429 typedef void (*attributeDeclSAXFunc)(void *ctx, 430 const xmlChar *elem, 431 const xmlChar *fullname, 432 int type, 433 int def, 434 const xmlChar *defaultValue, 435 xmlEnumerationPtr tree); 436 /** 437 * elementDeclSAXFunc: 438 * @ctx: the user data (XML parser context) 439 * @name: the element name 440 * @type: the element type 441 * @content: the element value tree 442 * 443 * An element definition has been parsed. 444 */ 445 typedef void (*elementDeclSAXFunc)(void *ctx, 446 const xmlChar *name, 447 int type, 448 xmlElementContentPtr content); 449 /** 450 * unparsedEntityDeclSAXFunc: 451 * @ctx: the user data (XML parser context) 452 * @name: The name of the entity 453 * @publicId: The public ID of the entity 454 * @systemId: The system ID of the entity 455 * @notationName: the name of the notation 456 * 457 * What to do when an unparsed entity declaration is parsed. 458 */ 459 typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 460 const xmlChar *name, 461 const xmlChar *publicId, 462 const xmlChar *systemId, 463 const xmlChar *notationName); 464 /** 465 * setDocumentLocatorSAXFunc: 466 * @ctx: the user data (XML parser context) 467 * @loc: A SAX Locator 468 * 469 * Receive the document locator at startup, actually xmlDefaultSAXLocator. 470 * Everything is available on the context, so this is useless in our case. 471 */ 472 typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 473 xmlSAXLocatorPtr loc); 474 /** 475 * startDocumentSAXFunc: 476 * @ctx: the user data (XML parser context) 477 * 478 * Called when the document start being processed. 479 */ 480 typedef void (*startDocumentSAXFunc) (void *ctx); 481 /** 482 * endDocumentSAXFunc: 483 * @ctx: the user data (XML parser context) 484 * 485 * Called when the document end has been detected. 486 */ 487 typedef void (*endDocumentSAXFunc) (void *ctx); 488 /** 489 * startElementSAXFunc: 490 * @ctx: the user data (XML parser context) 491 * @name: The element name, including namespace prefix 492 * @atts: An array of name/value attributes pairs, NULL terminated 493 * 494 * Called when an opening tag has been processed. 495 */ 496 typedef void (*startElementSAXFunc) (void *ctx, 497 const xmlChar *name, 498 const xmlChar **atts); 499 /** 500 * endElementSAXFunc: 501 * @ctx: the user data (XML parser context) 502 * @name: The element name 503 * 504 * Called when the end of an element has been detected. 505 */ 506 typedef void (*endElementSAXFunc) (void *ctx, 507 const xmlChar *name); 508 /** 509 * attributeSAXFunc: 510 * @ctx: the user data (XML parser context) 511 * @name: The attribute name, including namespace prefix 512 * @value: The attribute value 513 * 514 * Handle an attribute that has been read by the parser. 515 * The default handling is to convert the attribute into an 516 * DOM subtree and past it in a new xmlAttr element added to 517 * the element. 518 */ 519 typedef void (*attributeSAXFunc) (void *ctx, 520 const xmlChar *name, 521 const xmlChar *value); 522 /** 523 * referenceSAXFunc: 524 * @ctx: the user data (XML parser context) 525 * @name: The entity name 526 * 527 * Called when an entity reference is detected. 528 */ 529 typedef void (*referenceSAXFunc) (void *ctx, 530 const xmlChar *name); 531 /** 532 * charactersSAXFunc: 533 * @ctx: the user data (XML parser context) 534 * @ch: a xmlChar string 535 * @len: the number of xmlChar 536 * 537 * Receiving some chars from the parser. 538 */ 539 typedef void (*charactersSAXFunc) (void *ctx, 540 const xmlChar *ch, 541 int len); 542 /** 543 * ignorableWhitespaceSAXFunc: 544 * @ctx: the user data (XML parser context) 545 * @ch: a xmlChar string 546 * @len: the number of xmlChar 547 * 548 * Receiving some ignorable whitespaces from the parser. 549 * UNUSED: by default the DOM building will use characters. 550 */ 551 typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 552 const xmlChar *ch, 553 int len); 554 /** 555 * processingInstructionSAXFunc: 556 * @ctx: the user data (XML parser context) 557 * @target: the target name 558 * @data: the PI data's 559 * 560 * A processing instruction has been parsed. 561 */ 562 typedef void (*processingInstructionSAXFunc) (void *ctx, 563 const xmlChar *target, 564 const xmlChar *data); 565 /** 566 * commentSAXFunc: 567 * @ctx: the user data (XML parser context) 568 * @value: the comment content 569 * 570 * A comment has been parsed. 571 */ 572 typedef void (*commentSAXFunc) (void *ctx, 573 const xmlChar *value); 574 /** 575 * cdataBlockSAXFunc: 576 * @ctx: the user data (XML parser context) 577 * @value: The pcdata content 578 * @len: the block length 579 * 580 * Called when a pcdata block has been parsed. 581 */ 582 typedef void (*cdataBlockSAXFunc) ( 583 void *ctx, 584 const xmlChar *value, 585 int len); 586 /** 587 * warningSAXFunc: 588 * @ctx: an XML parser context 589 * @msg: the message to display/transmit 590 * @...: extra parameters for the message display 591 * 592 * Display and format a warning messages, callback. 593 */ 594 typedef void (XMLCDECL *warningSAXFunc) (void *ctx, 595 const char *msg, ...); 596 /** 597 * errorSAXFunc: 598 * @ctx: an XML parser context 599 * @msg: the message to display/transmit 600 * @...: extra parameters for the message display 601 * 602 * Display and format an error messages, callback. 603 */ 604 typedef void (XMLCDECL *errorSAXFunc) (void *ctx, 605 const char *msg, ...); 606 /** 607 * fatalErrorSAXFunc: 608 * @ctx: an XML parser context 609 * @msg: the message to display/transmit 610 * @...: extra parameters for the message display 611 * 612 * Display and format fatal error messages, callback. 613 * Note: so far fatalError() SAX callbacks are not used, error() 614 * get all the callbacks for errors. 615 */ 616 typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx, 617 const char *msg, ...); 618 /** 619 * isStandaloneSAXFunc: 620 * @ctx: the user data (XML parser context) 621 * 622 * Is this document tagged standalone? 623 * 624 * Returns 1 if true 625 */ 626 typedef int (*isStandaloneSAXFunc) (void *ctx); 627 /** 628 * hasInternalSubsetSAXFunc: 629 * @ctx: the user data (XML parser context) 630 * 631 * Does this document has an internal subset. 632 * 633 * Returns 1 if true 634 */ 635 typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 636 637 /** 638 * hasExternalSubsetSAXFunc: 639 * @ctx: the user data (XML parser context) 640 * 641 * Does this document has an external subset? 642 * 643 * Returns 1 if true 644 */ 645 typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 646 647 /************************************************************************ 648 * * 649 * The SAX version 2 API extensions * 650 * * 651 ************************************************************************/ 652 /** 653 * XML_SAX2_MAGIC: 654 * 655 * Special constant found in SAX2 blocks initialized fields 656 */ 657 #define XML_SAX2_MAGIC 0xDEEDBEAF 658 659 /** 660 * startElementNsSAX2Func: 661 * @ctx: the user data (XML parser context) 662 * @localname: the local name of the element 663 * @prefix: the element namespace prefix if available 664 * @URI: the element namespace name if available 665 * @nb_namespaces: number of namespace definitions on that node 666 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions 667 * @nb_attributes: the number of attributes on that node 668 * @nb_defaulted: the number of defaulted attributes. The defaulted 669 * ones are at the end of the array 670 * @attributes: pointer to the array of (localname/prefix/URI/value/end) 671 * attribute values. 672 * 673 * SAX2 callback when an element start has been detected by the parser. 674 * It provides the namespace informations for the element, as well as 675 * the new namespace declarations on the element. 676 */ 677 678 typedef void (*startElementNsSAX2Func) (void *ctx, 679 const xmlChar *localname, 680 const xmlChar *prefix, 681 const xmlChar *URI, 682 int nb_namespaces, 683 const xmlChar **namespaces, 684 int nb_attributes, 685 int nb_defaulted, 686 const xmlChar **attributes); 687 688 /** 689 * endElementNsSAX2Func: 690 * @ctx: the user data (XML parser context) 691 * @localname: the local name of the element 692 * @prefix: the element namespace prefix if available 693 * @URI: the element namespace name if available 694 * 695 * SAX2 callback when an element end has been detected by the parser. 696 * It provides the namespace informations for the element. 697 */ 698 699 typedef void (*endElementNsSAX2Func) (void *ctx, 700 const xmlChar *localname, 701 const xmlChar *prefix, 702 const xmlChar *URI); 703 704 705 struct _xmlSAXHandler { 706 internalSubsetSAXFunc internalSubset; 707 isStandaloneSAXFunc isStandalone; 708 hasInternalSubsetSAXFunc hasInternalSubset; 709 hasExternalSubsetSAXFunc hasExternalSubset; 710 resolveEntitySAXFunc resolveEntity; 711 getEntitySAXFunc getEntity; 712 entityDeclSAXFunc entityDecl; 713 notationDeclSAXFunc notationDecl; 714 attributeDeclSAXFunc attributeDecl; 715 elementDeclSAXFunc elementDecl; 716 unparsedEntityDeclSAXFunc unparsedEntityDecl; 717 setDocumentLocatorSAXFunc setDocumentLocator; 718 startDocumentSAXFunc startDocument; 719 endDocumentSAXFunc endDocument; 720 startElementSAXFunc startElement; 721 endElementSAXFunc endElement; 722 referenceSAXFunc reference; 723 charactersSAXFunc characters; 724 ignorableWhitespaceSAXFunc ignorableWhitespace; 725 processingInstructionSAXFunc processingInstruction; 726 commentSAXFunc comment; 727 warningSAXFunc warning; 728 errorSAXFunc error; 729 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 730 getParameterEntitySAXFunc getParameterEntity; 731 cdataBlockSAXFunc cdataBlock; 732 externalSubsetSAXFunc externalSubset; 733 unsigned int initialized; 734 /* The following fields are extensions available only on version 2 */ 735 void *_private; 736 startElementNsSAX2Func startElementNs; 737 endElementNsSAX2Func endElementNs; 738 xmlStructuredErrorFunc serror; 739 }; 740 741 /* 742 * SAX Version 1 743 */ 744 typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1; 745 typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr; 746 struct _xmlSAXHandlerV1 { 747 internalSubsetSAXFunc internalSubset; 748 isStandaloneSAXFunc isStandalone; 749 hasInternalSubsetSAXFunc hasInternalSubset; 750 hasExternalSubsetSAXFunc hasExternalSubset; 751 resolveEntitySAXFunc resolveEntity; 752 getEntitySAXFunc getEntity; 753 entityDeclSAXFunc entityDecl; 754 notationDeclSAXFunc notationDecl; 755 attributeDeclSAXFunc attributeDecl; 756 elementDeclSAXFunc elementDecl; 757 unparsedEntityDeclSAXFunc unparsedEntityDecl; 758 setDocumentLocatorSAXFunc setDocumentLocator; 759 startDocumentSAXFunc startDocument; 760 endDocumentSAXFunc endDocument; 761 startElementSAXFunc startElement; 762 endElementSAXFunc endElement; 763 referenceSAXFunc reference; 764 charactersSAXFunc characters; 765 ignorableWhitespaceSAXFunc ignorableWhitespace; 766 processingInstructionSAXFunc processingInstruction; 767 commentSAXFunc comment; 768 warningSAXFunc warning; 769 errorSAXFunc error; 770 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 771 getParameterEntitySAXFunc getParameterEntity; 772 cdataBlockSAXFunc cdataBlock; 773 externalSubsetSAXFunc externalSubset; 774 unsigned int initialized; 775 }; 776 777 778 /** 779 * xmlExternalEntityLoader: 780 * @URL: The System ID of the resource requested 781 * @ID: The Public ID of the resource requested 782 * @context: the XML parser context 783 * 784 * External entity loaders types. 785 * 786 * Returns the entity input parser. 787 */ 788 typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, 789 const char *ID, 790 xmlParserCtxtPtr context); 791 792 #ifdef __cplusplus 793 } 794 #endif 795 796 #include <libxml/encoding.h> 797 #include <libxml/xmlIO.h> 798 #include <libxml/globals.h> 799 800 #ifdef __cplusplus 801 extern "C" { 802 #endif 803 804 805 /* 806 * Init/Cleanup 807 */ 808 XMLPUBFUN void XMLCALL 809 xmlInitParser (void); 810 XMLPUBFUN void XMLCALL 811 xmlCleanupParser (void); 812 813 /* 814 * Input functions 815 */ 816 XMLPUBFUN int XMLCALL 817 xmlParserInputRead (xmlParserInputPtr in, 818 int len); 819 XMLPUBFUN int XMLCALL 820 xmlParserInputGrow (xmlParserInputPtr in, 821 int len); 822 823 /* 824 * Basic parsing Interfaces 825 */ 826 #ifdef LIBXML_SAX1_ENABLED 827 XMLPUBFUN xmlDocPtr XMLCALL 828 xmlParseDoc (const xmlChar *cur); 829 XMLPUBFUN xmlDocPtr XMLCALL 830 xmlParseFile (const char *filename); 831 XMLPUBFUN xmlDocPtr XMLCALL 832 xmlParseMemory (const char *buffer, 833 int size); 834 #endif /* LIBXML_SAX1_ENABLED */ 835 XMLPUBFUN int XMLCALL 836 xmlSubstituteEntitiesDefault(int val); 837 XMLPUBFUN int XMLCALL 838 xmlKeepBlanksDefault (int val); 839 XMLPUBFUN void XMLCALL 840 xmlStopParser (xmlParserCtxtPtr ctxt); 841 XMLPUBFUN int XMLCALL 842 xmlPedanticParserDefault(int val); 843 XMLPUBFUN int XMLCALL 844 xmlLineNumbersDefault (int val); 845 846 #ifdef LIBXML_SAX1_ENABLED 847 /* 848 * Recovery mode 849 */ 850 XMLPUBFUN xmlDocPtr XMLCALL 851 xmlRecoverDoc (xmlChar *cur); 852 XMLPUBFUN xmlDocPtr XMLCALL 853 xmlRecoverMemory (const char *buffer, 854 int size); 855 XMLPUBFUN xmlDocPtr XMLCALL 856 xmlRecoverFile (const char *filename); 857 #endif /* LIBXML_SAX1_ENABLED */ 858 859 /* 860 * Less common routines and SAX interfaces 861 */ 862 XMLPUBFUN int XMLCALL 863 xmlParseDocument (xmlParserCtxtPtr ctxt); 864 XMLPUBFUN int XMLCALL 865 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 866 #ifdef LIBXML_SAX1_ENABLED 867 XMLPUBFUN int XMLCALL 868 xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 869 void *user_data, 870 const char *filename); 871 XMLPUBFUN int XMLCALL 872 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 873 void *user_data, 874 const char *buffer, 875 int size); 876 XMLPUBFUN xmlDocPtr XMLCALL 877 xmlSAXParseDoc (xmlSAXHandlerPtr sax, 878 const xmlChar *cur, 879 int recovery); 880 XMLPUBFUN xmlDocPtr XMLCALL 881 xmlSAXParseMemory (xmlSAXHandlerPtr sax, 882 const char *buffer, 883 int size, 884 int recovery); 885 XMLPUBFUN xmlDocPtr XMLCALL 886 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, 887 const char *buffer, 888 int size, 889 int recovery, 890 void *data); 891 XMLPUBFUN xmlDocPtr XMLCALL 892 xmlSAXParseFile (xmlSAXHandlerPtr sax, 893 const char *filename, 894 int recovery); 895 XMLPUBFUN xmlDocPtr XMLCALL 896 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, 897 const char *filename, 898 int recovery, 899 void *data); 900 XMLPUBFUN xmlDocPtr XMLCALL 901 xmlSAXParseEntity (xmlSAXHandlerPtr sax, 902 const char *filename); 903 XMLPUBFUN xmlDocPtr XMLCALL 904 xmlParseEntity (const char *filename); 905 #endif /* LIBXML_SAX1_ENABLED */ 906 907 #ifdef LIBXML_VALID_ENABLED 908 XMLPUBFUN xmlDtdPtr XMLCALL 909 xmlSAXParseDTD (xmlSAXHandlerPtr sax, 910 const xmlChar *ExternalID, 911 const xmlChar *SystemID); 912 XMLPUBFUN xmlDtdPtr XMLCALL 913 xmlParseDTD (const xmlChar *ExternalID, 914 const xmlChar *SystemID); 915 XMLPUBFUN xmlDtdPtr XMLCALL 916 xmlIOParseDTD (xmlSAXHandlerPtr sax, 917 xmlParserInputBufferPtr input, 918 xmlCharEncoding enc); 919 #endif /* LIBXML_VALID_ENABLE */ 920 #ifdef LIBXML_SAX1_ENABLED 921 XMLPUBFUN int XMLCALL 922 xmlParseBalancedChunkMemory(xmlDocPtr doc, 923 xmlSAXHandlerPtr sax, 924 void *user_data, 925 int depth, 926 const xmlChar *string, 927 xmlNodePtr *lst); 928 #endif /* LIBXML_SAX1_ENABLED */ 929 XMLPUBFUN xmlParserErrors XMLCALL 930 xmlParseInNodeContext (xmlNodePtr node, 931 const char *data, 932 int datalen, 933 int options, 934 xmlNodePtr *lst); 935 #ifdef LIBXML_SAX1_ENABLED 936 XMLPUBFUN int XMLCALL 937 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, 938 xmlSAXHandlerPtr sax, 939 void *user_data, 940 int depth, 941 const xmlChar *string, 942 xmlNodePtr *lst, 943 int recover); 944 XMLPUBFUN int XMLCALL 945 xmlParseExternalEntity (xmlDocPtr doc, 946 xmlSAXHandlerPtr sax, 947 void *user_data, 948 int depth, 949 const xmlChar *URL, 950 const xmlChar *ID, 951 xmlNodePtr *lst); 952 #endif /* LIBXML_SAX1_ENABLED */ 953 XMLPUBFUN int XMLCALL 954 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 955 const xmlChar *URL, 956 const xmlChar *ID, 957 xmlNodePtr *lst); 958 959 /* 960 * Parser contexts handling. 961 */ 962 XMLPUBFUN xmlParserCtxtPtr XMLCALL 963 xmlNewParserCtxt (void); 964 XMLPUBFUN int XMLCALL 965 xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 966 XMLPUBFUN void XMLCALL 967 xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 968 XMLPUBFUN void XMLCALL 969 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 970 #ifdef LIBXML_SAX1_ENABLED 971 XMLPUBFUN void XMLCALL 972 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 973 const xmlChar* buffer, 974 const char *filename); 975 #endif /* LIBXML_SAX1_ENABLED */ 976 XMLPUBFUN xmlParserCtxtPtr XMLCALL 977 xmlCreateDocParserCtxt (const xmlChar *cur); 978 979 #ifdef LIBXML_LEGACY_ENABLED 980 /* 981 * Reading/setting optional parsing features. 982 */ 983 XMLPUBFUN int XMLCALL 984 xmlGetFeaturesList (int *len, 985 const char **result); 986 XMLPUBFUN int XMLCALL 987 xmlGetFeature (xmlParserCtxtPtr ctxt, 988 const char *name, 989 void *result); 990 XMLPUBFUN int XMLCALL 991 xmlSetFeature (xmlParserCtxtPtr ctxt, 992 const char *name, 993 void *value); 994 #endif /* LIBXML_LEGACY_ENABLED */ 995 996 #ifdef LIBXML_PUSH_ENABLED 997 /* 998 * Interfaces for the Push mode. 999 */ 1000 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1001 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 1002 void *user_data, 1003 const char *chunk, 1004 int size, 1005 const char *filename); 1006 XMLPUBFUN int XMLCALL 1007 xmlParseChunk (xmlParserCtxtPtr ctxt, 1008 const char *chunk, 1009 int size, 1010 int terminate); 1011 #endif /* LIBXML_PUSH_ENABLED */ 1012 1013 /* 1014 * Special I/O mode. 1015 */ 1016 1017 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1018 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 1019 void *user_data, 1020 xmlInputReadCallback ioread, 1021 xmlInputCloseCallback ioclose, 1022 void *ioctx, 1023 xmlCharEncoding enc); 1024 1025 XMLPUBFUN xmlParserInputPtr XMLCALL 1026 xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 1027 xmlParserInputBufferPtr input, 1028 xmlCharEncoding enc); 1029 1030 /* 1031 * Node infos. 1032 */ 1033 XMLPUBFUN const xmlParserNodeInfo* XMLCALL 1034 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, 1035 const xmlNodePtr node); 1036 XMLPUBFUN void XMLCALL 1037 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1038 XMLPUBFUN void XMLCALL 1039 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1040 XMLPUBFUN unsigned long XMLCALL 1041 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1042 const xmlNodePtr node); 1043 XMLPUBFUN void XMLCALL 1044 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 1045 const xmlParserNodeInfoPtr info); 1046 1047 /* 1048 * External entities handling actually implemented in xmlIO. 1049 */ 1050 1051 XMLPUBFUN void XMLCALL 1052 xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 1053 XMLPUBFUN xmlExternalEntityLoader XMLCALL 1054 xmlGetExternalEntityLoader(void); 1055 XMLPUBFUN xmlParserInputPtr XMLCALL 1056 xmlLoadExternalEntity (const char *URL, 1057 const char *ID, 1058 xmlParserCtxtPtr ctxt); 1059 1060 /* 1061 * Index lookup, actually implemented in the encoding module 1062 */ 1063 XMLPUBFUN long XMLCALL 1064 xmlByteConsumed (xmlParserCtxtPtr ctxt); 1065 1066 /* 1067 * New set of simpler/more flexible APIs 1068 */ 1069 /** 1070 * xmlParserOption: 1071 * 1072 * This is the set of XML parser options that can be passed down 1073 * to the xmlReadDoc() and similar calls. 1074 */ 1075 typedef enum { 1076 XML_PARSE_RECOVER = 1<<0, /* recover on errors */ 1077 XML_PARSE_NOENT = 1<<1, /* substitute entities */ 1078 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ 1079 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ 1080 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ 1081 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1082 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ 1083 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1084 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1085 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ 1086 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */ 1087 XML_PARSE_NONET = 1<<11,/* Forbid network access */ 1088 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */ 1089 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ 1090 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ 1091 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ 1092 XML_PARSE_COMPACT = 1<<16 /* compact small text nodes; no modification of 1093 the tree allowed afterwards (will possibly 1094 crash if you try to modify the tree) */ 1095 } xmlParserOption; 1096 1097 XMLPUBFUN void XMLCALL 1098 xmlCtxtReset (xmlParserCtxtPtr ctxt); 1099 XMLPUBFUN int XMLCALL 1100 xmlCtxtResetPush (xmlParserCtxtPtr ctxt, 1101 const char *chunk, 1102 int size, 1103 const char *filename, 1104 const char *encoding); 1105 XMLPUBFUN int XMLCALL 1106 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, 1107 int options); 1108 XMLPUBFUN xmlDocPtr XMLCALL 1109 xmlReadDoc (const xmlChar *cur, 1110 const char *URL, 1111 const char *encoding, 1112 int options); 1113 XMLPUBFUN xmlDocPtr XMLCALL 1114 xmlReadFile (const char *URL, 1115 const char *encoding, 1116 int options); 1117 XMLPUBFUN xmlDocPtr XMLCALL 1118 xmlReadMemory (const char *buffer, 1119 int size, 1120 const char *URL, 1121 const char *encoding, 1122 int options); 1123 XMLPUBFUN xmlDocPtr XMLCALL 1124 xmlReadFd (int fd, 1125 const char *URL, 1126 const char *encoding, 1127 int options); 1128 XMLPUBFUN xmlDocPtr XMLCALL 1129 xmlReadIO (xmlInputReadCallback ioread, 1130 xmlInputCloseCallback ioclose, 1131 void *ioctx, 1132 const char *URL, 1133 const char *encoding, 1134 int options); 1135 XMLPUBFUN xmlDocPtr XMLCALL 1136 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 1137 const xmlChar *cur, 1138 const char *URL, 1139 const char *encoding, 1140 int options); 1141 XMLPUBFUN xmlDocPtr XMLCALL 1142 xmlCtxtReadFile (xmlParserCtxtPtr ctxt, 1143 const char *filename, 1144 const char *encoding, 1145 int options); 1146 XMLPUBFUN xmlDocPtr XMLCALL 1147 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 1148 const char *buffer, 1149 int size, 1150 const char *URL, 1151 const char *encoding, 1152 int options); 1153 XMLPUBFUN xmlDocPtr XMLCALL 1154 xmlCtxtReadFd (xmlParserCtxtPtr ctxt, 1155 int fd, 1156 const char *URL, 1157 const char *encoding, 1158 int options); 1159 XMLPUBFUN xmlDocPtr XMLCALL 1160 xmlCtxtReadIO (xmlParserCtxtPtr ctxt, 1161 xmlInputReadCallback ioread, 1162 xmlInputCloseCallback ioclose, 1163 void *ioctx, 1164 const char *URL, 1165 const char *encoding, 1166 int options); 1167 1168 /* 1169 * Library wide options 1170 */ 1171 /** 1172 * xmlFeature: 1173 * 1174 * Used to examine the existance of features that can be enabled 1175 * or disabled at compile-time. 1176 * They used to be called XML_FEATURE_xxx but this clashed with Expat 1177 */ 1178 typedef enum { 1179 XML_WITH_THREAD = 1, 1180 XML_WITH_TREE = 2, 1181 XML_WITH_OUTPUT = 3, 1182 XML_WITH_PUSH = 4, 1183 XML_WITH_READER = 5, 1184 XML_WITH_PATTERN = 6, 1185 XML_WITH_WRITER = 7, 1186 XML_WITH_SAX1 = 8, 1187 XML_WITH_FTP = 9, 1188 XML_WITH_HTTP = 10, 1189 XML_WITH_VALID = 11, 1190 XML_WITH_HTML = 12, 1191 XML_WITH_LEGACY = 13, 1192 XML_WITH_C14N = 14, 1193 XML_WITH_CATALOG = 15, 1194 XML_WITH_XPATH = 16, 1195 XML_WITH_XPTR = 17, 1196 XML_WITH_XINCLUDE = 18, 1197 XML_WITH_ICONV = 19, 1198 XML_WITH_ISO8859X = 20, 1199 XML_WITH_UNICODE = 21, 1200 XML_WITH_REGEXP = 22, 1201 XML_WITH_AUTOMATA = 23, 1202 XML_WITH_EXPR = 24, 1203 XML_WITH_SCHEMAS = 25, 1204 XML_WITH_SCHEMATRON = 26, 1205 XML_WITH_MODULES = 27, 1206 XML_WITH_DEBUG = 28, 1207 XML_WITH_DEBUG_MEM = 29, 1208 XML_WITH_DEBUG_RUN = 30, 1209 XML_WITH_ZLIB = 31, 1210 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ 1211 } xmlFeature; 1212 1213 XMLPUBFUN int XMLCALL 1214 xmlHasFeature (xmlFeature feature); 1215 1216 #ifdef __cplusplus 1217 } 1218 #endif 1219 #endif /* __XML_PARSER_H__ */ 1220 1221