1 /* 2 * Summary: internals routines exported by the parser. 3 * Description: this module exports a number of internal parsing routines 4 * they are not really all intended for applications but 5 * can prove useful doing low level processing. 6 * 7 * Copy: See Copyright for the status of this software. 8 * 9 * Author: Daniel Veillard 10 */ 11 12 #ifndef __XML_PARSER_INTERNALS_H__ 13 #define __XML_PARSER_INTERNALS_H__ 14 15 #include <libxml/xmlversion.h> 16 #include <libxml/parser.h> 17 #include <libxml/HTMLparser.h> 18 #include <libxml/chvalid.h> 19 20 #ifdef __cplusplus 21 extern "C" { 22 #endif 23 24 /** 25 * xmlParserMaxDepth: 26 * 27 * arbitrary depth limit for the XML documents that we allow to 28 * process. This is not a limitation of the parser but a safety 29 * boundary feature. 30 */ 31 XMLPUBVAR unsigned int xmlParserMaxDepth; 32 33 /** 34 * XML_MAX_NAMELEN: 35 * 36 * Identifiers can be longer, but this will be more costly 37 * at runtime. 38 */ 39 #define XML_MAX_NAMELEN 100 40 41 /** 42 * INPUT_CHUNK: 43 * 44 * The parser tries to always have that amount of input ready. 45 * One of the point is providing context when reporting errors. 46 */ 47 #define INPUT_CHUNK 250 48 49 /************************************************************************ 50 * * 51 * UNICODE version of the macros. * 52 * * 53 ************************************************************************/ 54 /** 55 * IS_BYTE_CHAR: 56 * @c: an byte value (int) 57 * 58 * Macro to check the following production in the XML spec: 59 * 60 * [2] Char ::= #x9 | #xA | #xD | [#x20...] 61 * any byte character in the accepted range 62 */ 63 #define IS_BYTE_CHAR(c) xmlIsChar_ch(c) 64 65 /** 66 * IS_CHAR: 67 * @c: an UNICODE value (int) 68 * 69 * Macro to check the following production in the XML spec: 70 * 71 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] 72 * | [#x10000-#x10FFFF] 73 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. 74 */ 75 #define IS_CHAR(c) xmlIsCharQ(c) 76 77 /** 78 * IS_CHAR_CH: 79 * @c: an xmlChar (usually an unsigned char) 80 * 81 * Behaves like IS_CHAR on single-byte value 82 */ 83 #define IS_CHAR_CH(c) xmlIsChar_ch(c) 84 85 /** 86 * IS_BLANK: 87 * @c: an UNICODE value (int) 88 * 89 * Macro to check the following production in the XML spec: 90 * 91 * [3] S ::= (#x20 | #x9 | #xD | #xA)+ 92 */ 93 #define IS_BLANK(c) xmlIsBlankQ(c) 94 95 /** 96 * IS_BLANK_CH: 97 * @c: an xmlChar value (normally unsigned char) 98 * 99 * Behaviour same as IS_BLANK 100 */ 101 #define IS_BLANK_CH(c) xmlIsBlank_ch(c) 102 103 /** 104 * IS_BASECHAR: 105 * @c: an UNICODE value (int) 106 * 107 * Macro to check the following production in the XML spec: 108 * 109 * [85] BaseChar ::= ... long list see REC ... 110 */ 111 #define IS_BASECHAR(c) xmlIsBaseCharQ(c) 112 113 /** 114 * IS_DIGIT: 115 * @c: an UNICODE value (int) 116 * 117 * Macro to check the following production in the XML spec: 118 * 119 * [88] Digit ::= ... long list see REC ... 120 */ 121 #define IS_DIGIT(c) xmlIsDigitQ(c) 122 123 /** 124 * IS_DIGIT_CH: 125 * @c: an xmlChar value (usually an unsigned char) 126 * 127 * Behaves like IS_DIGIT but with a single byte argument 128 */ 129 #define IS_DIGIT_CH(c) xmlIsDigit_ch(c) 130 131 /** 132 * IS_COMBINING: 133 * @c: an UNICODE value (int) 134 * 135 * Macro to check the following production in the XML spec: 136 * 137 * [87] CombiningChar ::= ... long list see REC ... 138 */ 139 #define IS_COMBINING(c) xmlIsCombiningQ(c) 140 141 /** 142 * IS_COMBINING_CH: 143 * @c: an xmlChar (usually an unsigned char) 144 * 145 * Always false (all combining chars > 0xff) 146 */ 147 #define IS_COMBINING_CH(c) 0 148 149 /** 150 * IS_EXTENDER: 151 * @c: an UNICODE value (int) 152 * 153 * Macro to check the following production in the XML spec: 154 * 155 * 156 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | 157 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | 158 * [#x309D-#x309E] | [#x30FC-#x30FE] 159 */ 160 #define IS_EXTENDER(c) xmlIsExtenderQ(c) 161 162 /** 163 * IS_EXTENDER_CH: 164 * @c: an xmlChar value (usually an unsigned char) 165 * 166 * Behaves like IS_EXTENDER but with a single-byte argument 167 */ 168 #define IS_EXTENDER_CH(c) xmlIsExtender_ch(c) 169 170 /** 171 * IS_IDEOGRAPHIC: 172 * @c: an UNICODE value (int) 173 * 174 * Macro to check the following production in the XML spec: 175 * 176 * 177 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] 178 */ 179 #define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c) 180 181 /** 182 * IS_LETTER: 183 * @c: an UNICODE value (int) 184 * 185 * Macro to check the following production in the XML spec: 186 * 187 * 188 * [84] Letter ::= BaseChar | Ideographic 189 */ 190 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) 191 192 /** 193 * IS_LETTER_CH: 194 * @c: an xmlChar value (normally unsigned char) 195 * 196 * Macro behaves like IS_LETTER, but only check base chars 197 * 198 */ 199 #define IS_LETTER_CH(c) xmlIsBaseChar_ch(c) 200 201 /** 202 * IS_ASCII_LETTER: 203 * @c: an xmlChar value 204 * 205 * Macro to check [a-zA-Z] 206 * 207 */ 208 #define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \ 209 ((0x61 <= (c)) && ((c) <= 0x7a))) 210 211 /** 212 * IS_ASCII_DIGIT: 213 * @c: an xmlChar value 214 * 215 * Macro to check [0-9] 216 * 217 */ 218 #define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39)) 219 220 /** 221 * IS_PUBIDCHAR: 222 * @c: an UNICODE value (int) 223 * 224 * Macro to check the following production in the XML spec: 225 * 226 * 227 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 228 */ 229 #define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c) 230 231 /** 232 * IS_PUBIDCHAR_CH: 233 * @c: an xmlChar value (normally unsigned char) 234 * 235 * Same as IS_PUBIDCHAR but for single-byte value 236 */ 237 #define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c) 238 239 /** 240 * SKIP_EOL: 241 * @p: and UTF8 string pointer 242 * 243 * Skips the end of line chars. 244 */ 245 #define SKIP_EOL(p) \ 246 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \ 247 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; } 248 249 /** 250 * MOVETO_ENDTAG: 251 * @p: and UTF8 string pointer 252 * 253 * Skips to the next '>' char. 254 */ 255 #define MOVETO_ENDTAG(p) \ 256 while ((*p) && (*(p) != '>')) (p)++ 257 258 /** 259 * MOVETO_STARTTAG: 260 * @p: and UTF8 string pointer 261 * 262 * Skips to the next '<' char. 263 */ 264 #define MOVETO_STARTTAG(p) \ 265 while ((*p) && (*(p) != '<')) (p)++ 266 267 /** 268 * Global variables used for predefined strings. 269 */ 270 XMLPUBVAR const xmlChar xmlStringText[]; 271 XMLPUBVAR const xmlChar xmlStringTextNoenc[]; 272 XMLPUBVAR const xmlChar xmlStringComment[]; 273 274 /* 275 * Function to finish the work of the macros where needed. 276 */ 277 XMLPUBFUN int XMLCALL xmlIsLetter (int c); 278 279 /** 280 * Parser context. 281 */ 282 XMLPUBFUN xmlParserCtxtPtr XMLCALL 283 xmlCreateFileParserCtxt (const char *filename); 284 XMLPUBFUN xmlParserCtxtPtr XMLCALL 285 xmlCreateURLParserCtxt (const char *filename, 286 int options); 287 XMLPUBFUN xmlParserCtxtPtr XMLCALL 288 xmlCreateMemoryParserCtxt(const char *buffer, 289 int size); 290 XMLPUBFUN xmlParserCtxtPtr XMLCALL 291 xmlCreateEntityParserCtxt(const xmlChar *URL, 292 const xmlChar *ID, 293 const xmlChar *base); 294 XMLPUBFUN int XMLCALL 295 xmlSwitchEncoding (xmlParserCtxtPtr ctxt, 296 xmlCharEncoding enc); 297 XMLPUBFUN int XMLCALL 298 xmlSwitchToEncoding (xmlParserCtxtPtr ctxt, 299 xmlCharEncodingHandlerPtr handler); 300 XMLPUBFUN int XMLCALL 301 xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt, 302 xmlParserInputPtr input, 303 xmlCharEncodingHandlerPtr handler); 304 305 #ifdef IN_LIBXML 306 /* internal error reporting */ 307 XMLPUBFUN void XMLCALL 308 __xmlErrEncoding (xmlParserCtxtPtr ctxt, 309 xmlParserErrors xmlerr, 310 const char *msg, 311 const xmlChar * str1, 312 const xmlChar * str2); 313 #endif 314 315 /** 316 * Input Streams. 317 */ 318 XMLPUBFUN xmlParserInputPtr XMLCALL 319 xmlNewStringInputStream (xmlParserCtxtPtr ctxt, 320 const xmlChar *buffer); 321 XMLPUBFUN xmlParserInputPtr XMLCALL 322 xmlNewEntityInputStream (xmlParserCtxtPtr ctxt, 323 xmlEntityPtr entity); 324 XMLPUBFUN void XMLCALL 325 xmlPushInput (xmlParserCtxtPtr ctxt, 326 xmlParserInputPtr input); 327 XMLPUBFUN xmlChar XMLCALL 328 xmlPopInput (xmlParserCtxtPtr ctxt); 329 XMLPUBFUN void XMLCALL 330 xmlFreeInputStream (xmlParserInputPtr input); 331 XMLPUBFUN xmlParserInputPtr XMLCALL 332 xmlNewInputFromFile (xmlParserCtxtPtr ctxt, 333 const char *filename); 334 XMLPUBFUN xmlParserInputPtr XMLCALL 335 xmlNewInputStream (xmlParserCtxtPtr ctxt); 336 337 /** 338 * Namespaces. 339 */ 340 XMLPUBFUN xmlChar * XMLCALL 341 xmlSplitQName (xmlParserCtxtPtr ctxt, 342 const xmlChar *name, 343 xmlChar **prefix); 344 345 /** 346 * Generic production rules. 347 */ 348 XMLPUBFUN const xmlChar * XMLCALL 349 xmlParseName (xmlParserCtxtPtr ctxt); 350 XMLPUBFUN xmlChar * XMLCALL 351 xmlParseNmtoken (xmlParserCtxtPtr ctxt); 352 XMLPUBFUN xmlChar * XMLCALL 353 xmlParseEntityValue (xmlParserCtxtPtr ctxt, 354 xmlChar **orig); 355 XMLPUBFUN xmlChar * XMLCALL 356 xmlParseAttValue (xmlParserCtxtPtr ctxt); 357 XMLPUBFUN xmlChar * XMLCALL 358 xmlParseSystemLiteral (xmlParserCtxtPtr ctxt); 359 XMLPUBFUN xmlChar * XMLCALL 360 xmlParsePubidLiteral (xmlParserCtxtPtr ctxt); 361 XMLPUBFUN void XMLCALL 362 xmlParseCharData (xmlParserCtxtPtr ctxt, 363 int cdata); 364 XMLPUBFUN xmlChar * XMLCALL 365 xmlParseExternalID (xmlParserCtxtPtr ctxt, 366 xmlChar **publicID, 367 int strict); 368 XMLPUBFUN void XMLCALL 369 xmlParseComment (xmlParserCtxtPtr ctxt); 370 XMLPUBFUN const xmlChar * XMLCALL 371 xmlParsePITarget (xmlParserCtxtPtr ctxt); 372 XMLPUBFUN void XMLCALL 373 xmlParsePI (xmlParserCtxtPtr ctxt); 374 XMLPUBFUN void XMLCALL 375 xmlParseNotationDecl (xmlParserCtxtPtr ctxt); 376 XMLPUBFUN void XMLCALL 377 xmlParseEntityDecl (xmlParserCtxtPtr ctxt); 378 XMLPUBFUN int XMLCALL 379 xmlParseDefaultDecl (xmlParserCtxtPtr ctxt, 380 xmlChar **value); 381 XMLPUBFUN xmlEnumerationPtr XMLCALL 382 xmlParseNotationType (xmlParserCtxtPtr ctxt); 383 XMLPUBFUN xmlEnumerationPtr XMLCALL 384 xmlParseEnumerationType (xmlParserCtxtPtr ctxt); 385 XMLPUBFUN int XMLCALL 386 xmlParseEnumeratedType (xmlParserCtxtPtr ctxt, 387 xmlEnumerationPtr *tree); 388 XMLPUBFUN int XMLCALL 389 xmlParseAttributeType (xmlParserCtxtPtr ctxt, 390 xmlEnumerationPtr *tree); 391 XMLPUBFUN void XMLCALL 392 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt); 393 XMLPUBFUN xmlElementContentPtr XMLCALL 394 xmlParseElementMixedContentDecl 395 (xmlParserCtxtPtr ctxt, 396 int inputchk); 397 XMLPUBFUN xmlElementContentPtr XMLCALL 398 xmlParseElementChildrenContentDecl 399 (xmlParserCtxtPtr ctxt, 400 int inputchk); 401 XMLPUBFUN int XMLCALL 402 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, 403 const xmlChar *name, 404 xmlElementContentPtr *result); 405 XMLPUBFUN int XMLCALL 406 xmlParseElementDecl (xmlParserCtxtPtr ctxt); 407 XMLPUBFUN void XMLCALL 408 xmlParseMarkupDecl (xmlParserCtxtPtr ctxt); 409 XMLPUBFUN int XMLCALL 410 xmlParseCharRef (xmlParserCtxtPtr ctxt); 411 XMLPUBFUN xmlEntityPtr XMLCALL 412 xmlParseEntityRef (xmlParserCtxtPtr ctxt); 413 XMLPUBFUN void XMLCALL 414 xmlParseReference (xmlParserCtxtPtr ctxt); 415 XMLPUBFUN void XMLCALL 416 xmlParsePEReference (xmlParserCtxtPtr ctxt); 417 XMLPUBFUN void XMLCALL 418 xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); 419 #ifdef LIBXML_SAX1_ENABLED 420 XMLPUBFUN const xmlChar * XMLCALL 421 xmlParseAttribute (xmlParserCtxtPtr ctxt, 422 xmlChar **value); 423 XMLPUBFUN const xmlChar * XMLCALL 424 xmlParseStartTag (xmlParserCtxtPtr ctxt); 425 XMLPUBFUN void XMLCALL 426 xmlParseEndTag (xmlParserCtxtPtr ctxt); 427 #endif /* LIBXML_SAX1_ENABLED */ 428 XMLPUBFUN void XMLCALL 429 xmlParseCDSect (xmlParserCtxtPtr ctxt); 430 XMLPUBFUN void XMLCALL 431 xmlParseContent (xmlParserCtxtPtr ctxt); 432 XMLPUBFUN void XMLCALL 433 xmlParseElement (xmlParserCtxtPtr ctxt); 434 XMLPUBFUN xmlChar * XMLCALL 435 xmlParseVersionNum (xmlParserCtxtPtr ctxt); 436 XMLPUBFUN xmlChar * XMLCALL 437 xmlParseVersionInfo (xmlParserCtxtPtr ctxt); 438 XMLPUBFUN xmlChar * XMLCALL 439 xmlParseEncName (xmlParserCtxtPtr ctxt); 440 XMLPUBFUN const xmlChar * XMLCALL 441 xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); 442 XMLPUBFUN int XMLCALL 443 xmlParseSDDecl (xmlParserCtxtPtr ctxt); 444 XMLPUBFUN void XMLCALL 445 xmlParseXMLDecl (xmlParserCtxtPtr ctxt); 446 XMLPUBFUN void XMLCALL 447 xmlParseTextDecl (xmlParserCtxtPtr ctxt); 448 XMLPUBFUN void XMLCALL 449 xmlParseMisc (xmlParserCtxtPtr ctxt); 450 XMLPUBFUN void XMLCALL 451 xmlParseExternalSubset (xmlParserCtxtPtr ctxt, 452 const xmlChar *ExternalID, 453 const xmlChar *SystemID); 454 /** 455 * XML_SUBSTITUTE_NONE: 456 * 457 * If no entities need to be substituted. 458 */ 459 #define XML_SUBSTITUTE_NONE 0 460 /** 461 * XML_SUBSTITUTE_REF: 462 * 463 * Whether general entities need to be substituted. 464 */ 465 #define XML_SUBSTITUTE_REF 1 466 /** 467 * XML_SUBSTITUTE_PEREF: 468 * 469 * Whether parameter entities need to be substituted. 470 */ 471 #define XML_SUBSTITUTE_PEREF 2 472 /** 473 * XML_SUBSTITUTE_BOTH: 474 * 475 * Both general and parameter entities need to be substituted. 476 */ 477 #define XML_SUBSTITUTE_BOTH 3 478 479 XMLPUBFUN xmlChar * XMLCALL 480 xmlStringDecodeEntities (xmlParserCtxtPtr ctxt, 481 const xmlChar *str, 482 int what, 483 xmlChar end, 484 xmlChar end2, 485 xmlChar end3); 486 XMLPUBFUN xmlChar * XMLCALL 487 xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt, 488 const xmlChar *str, 489 int len, 490 int what, 491 xmlChar end, 492 xmlChar end2, 493 xmlChar end3); 494 495 /* 496 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP. 497 */ 498 XMLPUBFUN int XMLCALL nodePush (xmlParserCtxtPtr ctxt, 499 xmlNodePtr value); 500 XMLPUBFUN xmlNodePtr XMLCALL nodePop (xmlParserCtxtPtr ctxt); 501 XMLPUBFUN int XMLCALL inputPush (xmlParserCtxtPtr ctxt, 502 xmlParserInputPtr value); 503 XMLPUBFUN xmlParserInputPtr XMLCALL inputPop (xmlParserCtxtPtr ctxt); 504 XMLPUBFUN const xmlChar * XMLCALL namePop (xmlParserCtxtPtr ctxt); 505 XMLPUBFUN int XMLCALL namePush (xmlParserCtxtPtr ctxt, 506 const xmlChar *value); 507 508 /* 509 * other commodities shared between parser.c and parserInternals. 510 */ 511 XMLPUBFUN int XMLCALL xmlSkipBlankChars (xmlParserCtxtPtr ctxt); 512 XMLPUBFUN int XMLCALL xmlStringCurrentChar (xmlParserCtxtPtr ctxt, 513 const xmlChar *cur, 514 int *len); 515 XMLPUBFUN void XMLCALL xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 516 XMLPUBFUN int XMLCALL xmlCheckLanguageID (const xmlChar *lang); 517 518 /* 519 * Really core function shared with HTML parser. 520 */ 521 XMLPUBFUN int XMLCALL xmlCurrentChar (xmlParserCtxtPtr ctxt, 522 int *len); 523 XMLPUBFUN int XMLCALL xmlCopyCharMultiByte (xmlChar *out, 524 int val); 525 XMLPUBFUN int XMLCALL xmlCopyChar (int len, 526 xmlChar *out, 527 int val); 528 XMLPUBFUN void XMLCALL xmlNextChar (xmlParserCtxtPtr ctxt); 529 XMLPUBFUN void XMLCALL xmlParserInputShrink (xmlParserInputPtr in); 530 531 #ifdef LIBXML_HTML_ENABLED 532 /* 533 * Actually comes from the HTML parser but launched from the init stuff. 534 */ 535 XMLPUBFUN void XMLCALL htmlInitAutoClose (void); 536 XMLPUBFUN htmlParserCtxtPtr XMLCALL htmlCreateFileParserCtxt(const char *filename, 537 const char *encoding); 538 #endif 539 540 /* 541 * Specific function to keep track of entities references 542 * and used by the XSLT debugger. 543 */ 544 #ifdef LIBXML_LEGACY_ENABLED 545 /** 546 * xmlEntityReferenceFunc: 547 * @ent: the entity 548 * @firstNode: the fist node in the chunk 549 * @lastNode: the last nod in the chunk 550 * 551 * Callback function used when one needs to be able to track back the 552 * provenance of a chunk of nodes inherited from an entity replacement. 553 */ 554 typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent, 555 xmlNodePtr firstNode, 556 xmlNodePtr lastNode); 557 558 XMLPUBFUN void XMLCALL xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func); 559 560 XMLPUBFUN xmlChar * XMLCALL 561 xmlParseQuotedString (xmlParserCtxtPtr ctxt); 562 XMLPUBFUN void XMLCALL 563 xmlParseNamespace (xmlParserCtxtPtr ctxt); 564 XMLPUBFUN xmlChar * XMLCALL 565 xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt); 566 XMLPUBFUN xmlChar * XMLCALL 567 xmlScanName (xmlParserCtxtPtr ctxt); 568 XMLPUBFUN xmlChar * XMLCALL 569 xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt); 570 XMLPUBFUN void XMLCALL xmlParserHandleReference(xmlParserCtxtPtr ctxt); 571 XMLPUBFUN xmlChar * XMLCALL 572 xmlNamespaceParseQName (xmlParserCtxtPtr ctxt, 573 xmlChar **prefix); 574 /** 575 * Entities 576 */ 577 XMLPUBFUN xmlChar * XMLCALL 578 xmlDecodeEntities (xmlParserCtxtPtr ctxt, 579 int len, 580 int what, 581 xmlChar end, 582 xmlChar end2, 583 xmlChar end3); 584 XMLPUBFUN void XMLCALL 585 xmlHandleEntity (xmlParserCtxtPtr ctxt, 586 xmlEntityPtr entity); 587 588 #endif /* LIBXML_LEGACY_ENABLED */ 589 590 #ifdef IN_LIBXML 591 /* 592 * internal only 593 */ 594 XMLPUBFUN void XMLCALL 595 xmlErrMemory (xmlParserCtxtPtr ctxt, 596 const char *extra); 597 #endif 598 599 #ifdef __cplusplus 600 } 601 #endif 602 #endif /* __XML_PARSER_INTERNALS_H__ */ 603