1 /*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * daniel@veillard.com
8 */
9
10 #define IN_LIBXML
11 #include "libxml.h"
12
13 #if defined(_WIN32)
14 #define XML_DIR_SEP '\\'
15 #else
16 #define XML_DIR_SEP '/'
17 #endif
18
19 #include <string.h>
20 #include <ctype.h>
21 #include <stdlib.h>
22
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/entities.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/encoding.h>
30 #include <libxml/xmlIO.h>
31 #include <libxml/uri.h>
32 #include <libxml/dict.h>
33 #include <libxml/xmlsave.h>
34 #ifdef LIBXML_CATALOG_ENABLED
35 #include <libxml/catalog.h>
36 #endif
37 #include <libxml/chvalid.h>
38 #include <libxml/nanohttp.h>
39
40 #define CUR(ctxt) ctxt->input->cur
41 #define END(ctxt) ctxt->input->end
42
43 #include "private/buf.h"
44 #include "private/enc.h"
45 #include "private/error.h"
46 #include "private/io.h"
47 #include "private/parser.h"
48
49 #define XML_MAX_ERRORS 100
50
51 /*
52 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
53 * factor of serialized output after entity expansion.
54 */
55 #define XML_MAX_AMPLIFICATION_DEFAULT 5
56
57 /*
58 * Various global defaults for parsing
59 */
60
61 /**
62 * xmlCheckVersion:
63 * @version: the include version number
64 *
65 * check the compiled lib version against the include one.
66 */
67 void
xmlCheckVersion(int version)68 xmlCheckVersion(int version) {
69 int myversion = LIBXML_VERSION;
70
71 xmlInitParser();
72
73 if ((myversion / 10000) != (version / 10000)) {
74 fprintf(stderr,
75 "Fatal: program compiled against libxml %d using libxml %d\n",
76 (version / 10000), (myversion / 10000));
77 } else if ((myversion / 100) < (version / 100)) {
78 fprintf(stderr,
79 "Warning: program compiled against libxml %d using older %d\n",
80 (version / 100), (myversion / 100));
81 }
82 }
83
84
85 /************************************************************************
86 * *
87 * Some factorized error routines *
88 * *
89 ************************************************************************/
90
91
92 /**
93 * xmlCtxtSetErrorHandler:
94 * @ctxt: an XML parser context
95 * @handler: error handler
96 * @data: data for error handler
97 *
98 * Register a callback function that will be called on errors and
99 * warnings. If handler is NULL, the error handler will be deactivated.
100 *
101 * This is the recommended way to collect errors from the parser and
102 * takes precedence over all other error reporting mechanisms.
103 * These are (in order of precedence):
104 *
105 * - per-context structured handler (xmlCtxtSetErrorHandler)
106 * - per-context structured "serror" SAX handler
107 * - global structured handler (xmlSetStructuredErrorFunc)
108 * - per-context generic "error" and "warning" SAX handlers
109 * - global generic handler (xmlSetGenericErrorFunc)
110 * - print to stderr
111 *
112 * Available since 2.13.0.
113 */
114 void
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt,xmlStructuredErrorFunc handler,void * data)115 xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
116 void *data)
117 {
118 if (ctxt == NULL)
119 return;
120 ctxt->errorHandler = handler;
121 ctxt->errorCtxt = data;
122 }
123
124 /**
125 * xmlCtxtErrMemory:
126 * @ctxt: an XML parser context
127 *
128 * Handle an out-of-memory error.
129 *
130 * Available since 2.13.0.
131 */
132 void
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)133 xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
134 {
135 xmlStructuredErrorFunc schannel = NULL;
136 xmlGenericErrorFunc channel = NULL;
137 void *data;
138
139 if (ctxt == NULL)
140 return;
141
142 ctxt->errNo = XML_ERR_NO_MEMORY;
143 ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
144 ctxt->wellFormed = 0;
145 ctxt->disableSAX = 2;
146
147 if (ctxt->errorHandler) {
148 schannel = ctxt->errorHandler;
149 data = ctxt->errorCtxt;
150 } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
151 (ctxt->sax->serror != NULL)) {
152 schannel = ctxt->sax->serror;
153 data = ctxt->userData;
154 } else {
155 channel = ctxt->sax->error;
156 data = ctxt->userData;
157 }
158
159 xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
160 &ctxt->lastError);
161 }
162
163 /**
164 * xmlCtxtErrIO:
165 * @ctxt: parser context
166 * @code: xmlParserErrors code
167 * @uri: filename or URI (optional)
168 *
169 * If filename is empty, use the one from context input if available.
170 *
171 * Report an IO error to the parser context.
172 */
173 void
xmlCtxtErrIO(xmlParserCtxtPtr ctxt,int code,const char * uri)174 xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
175 {
176 const char *errstr, *msg, *str1, *str2;
177 xmlErrorLevel level;
178
179 if (ctxt == NULL)
180 return;
181
182 /*
183 * Don't report a well-formedness error if an external entity could
184 * not be found. We assume that inputNr is zero for the document
185 * entity which is somewhat fragile.
186 */
187 if ((ctxt->inputNr > 0) &&
188 ((code == XML_IO_ENOENT) ||
189 (code == XML_IO_NETWORK_ATTEMPT) ||
190 (code == XML_IO_UNKNOWN))) {
191 if (ctxt->validate == 0)
192 level = XML_ERR_WARNING;
193 else
194 level = XML_ERR_ERROR;
195 } else {
196 level = XML_ERR_FATAL;
197 }
198
199 errstr = xmlErrString(code);
200
201 if (uri == NULL) {
202 msg = "%s\n";
203 str1 = errstr;
204 str2 = NULL;
205 } else {
206 msg = "failed to load \"%s\": %s\n";
207 str1 = uri;
208 str2 = errstr;
209 }
210
211 xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
212 (const xmlChar *) uri, NULL, NULL, 0,
213 msg, str1, str2);
214 }
215
216 /**
217 * xmlCtxtVErr:
218 * @ctxt: a parser context
219 * @node: the current node or NULL
220 * @domain: the domain for the error
221 * @code: the code for the error
222 * @level: the xmlErrorLevel for the error
223 * @str1: extra string info
224 * @str2: extra string info
225 * @str3: extra string info
226 * @int1: extra int info
227 * @msg: the message to display/transmit
228 * @ap: extra parameters for the message display
229 *
230 * Raise a parser error.
231 */
232 void
xmlCtxtVErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,va_list ap)233 xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
234 xmlParserErrors code, xmlErrorLevel level,
235 const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
236 int int1, const char *msg, va_list ap)
237 {
238 xmlStructuredErrorFunc schannel = NULL;
239 xmlGenericErrorFunc channel = NULL;
240 void *data = NULL;
241 const char *file = NULL;
242 int line = 0;
243 int col = 0;
244 int res;
245
246 if (code == XML_ERR_NO_MEMORY) {
247 xmlCtxtErrMemory(ctxt);
248 return;
249 }
250
251 if (PARSER_STOPPED(ctxt))
252 return;
253
254 if (level == XML_ERR_WARNING) {
255 if (ctxt->nbWarnings >= XML_MAX_ERRORS)
256 return;
257 ctxt->nbWarnings += 1;
258 } else {
259 if (ctxt->nbErrors >= XML_MAX_ERRORS)
260 return;
261 ctxt->nbErrors += 1;
262 }
263
264 if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
265 ((level != XML_ERR_WARNING) ||
266 ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
267 if (ctxt->errorHandler) {
268 schannel = ctxt->errorHandler;
269 data = ctxt->errorCtxt;
270 } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
271 (ctxt->sax->serror != NULL)) {
272 schannel = ctxt->sax->serror;
273 data = ctxt->userData;
274 } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
275 if (level == XML_ERR_WARNING)
276 channel = ctxt->vctxt.warning;
277 else
278 channel = ctxt->vctxt.error;
279 data = ctxt->vctxt.userData;
280 } else {
281 if (level == XML_ERR_WARNING)
282 channel = ctxt->sax->warning;
283 else
284 channel = ctxt->sax->error;
285 data = ctxt->userData;
286 }
287 }
288
289 if (ctxt->input != NULL) {
290 xmlParserInputPtr input = ctxt->input;
291
292 if ((input->filename == NULL) &&
293 (ctxt->inputNr > 1)) {
294 input = ctxt->inputTab[ctxt->inputNr - 2];
295 }
296 file = input->filename;
297 line = input->line;
298 col = input->col;
299 }
300
301 res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
302 level, file, line, (const char *) str1,
303 (const char *) str2, (const char *) str3, int1, col,
304 msg, ap);
305
306 if (res < 0) {
307 xmlCtxtErrMemory(ctxt);
308 return;
309 }
310
311 if (level >= XML_ERR_ERROR)
312 ctxt->errNo = code;
313 if (level == XML_ERR_FATAL) {
314 ctxt->wellFormed = 0;
315 if (ctxt->recovery == 0)
316 ctxt->disableSAX = 1;
317 }
318
319 return;
320 }
321
322 /**
323 * xmlCtxtErr:
324 * @ctxt: a parser context
325 * @node: the current node or NULL
326 * @domain: the domain for the error
327 * @code: the code for the error
328 * @level: the xmlErrorLevel for the error
329 * @str1: extra string info
330 * @str2: extra string info
331 * @str3: extra string info
332 * @int1: extra int info
333 * @msg: the message to display/transmit
334 * @...: extra parameters for the message display
335 *
336 * Raise a parser error.
337 */
338 void
xmlCtxtErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,...)339 xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
340 xmlParserErrors code, xmlErrorLevel level,
341 const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
342 int int1, const char *msg, ...)
343 {
344 va_list ap;
345
346 va_start(ap, msg);
347 xmlCtxtVErr(ctxt, node, domain, code, level,
348 str1, str2, str3, int1, msg, ap);
349 va_end(ap);
350 }
351
352 /**
353 * xmlFatalErr:
354 * @ctxt: an XML parser context
355 * @code: the error number
356 * @info: extra information string
357 *
358 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
359 */
360 void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors code,const char * info)361 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
362 {
363 const char *errmsg;
364 xmlErrorLevel level;
365
366 if (code == XML_ERR_UNSUPPORTED_ENCODING)
367 level = XML_ERR_WARNING;
368 else
369 level = XML_ERR_FATAL;
370
371 errmsg = xmlErrString(code);
372
373 if (info == NULL) {
374 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
375 NULL, NULL, NULL, 0, "%s\n", errmsg);
376 } else {
377 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
378 (const xmlChar *) info, NULL, NULL, 0,
379 "%s: %s\n", errmsg, info);
380 }
381 }
382
383 /**
384 * xmlIsLetter:
385 * @c: an unicode character (int)
386 *
387 * Check whether the character is allowed by the production
388 * [84] Letter ::= BaseChar | Ideographic
389 *
390 * Returns 0 if not, non-zero otherwise
391 */
392 int
xmlIsLetter(int c)393 xmlIsLetter(int c) {
394 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
395 }
396
397 /************************************************************************
398 * *
399 * Input handling functions for progressive parsing *
400 * *
401 ************************************************************************/
402
403 /* we need to keep enough input to show errors in context */
404 #define LINE_LEN 80
405
406 /**
407 * xmlHaltParser:
408 * @ctxt: an XML parser context
409 *
410 * Blocks further parser processing don't override error
411 * for internal use
412 */
413 void
xmlHaltParser(xmlParserCtxtPtr ctxt)414 xmlHaltParser(xmlParserCtxtPtr ctxt) {
415 if (ctxt == NULL)
416 return;
417 ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
418 ctxt->disableSAX = 2;
419 }
420
421 /**
422 * xmlParserInputRead:
423 * @in: an XML parser input
424 * @len: an indicative size for the lookahead
425 *
426 * DEPRECATED: This function was internal and is deprecated.
427 *
428 * Returns -1 as this is an error to use it.
429 */
430 int
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED,int len ATTRIBUTE_UNUSED)431 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
432 return(-1);
433 }
434
435 /**
436 * xmlParserGrow:
437 * @ctxt: an XML parser context
438 *
439 * Grow the input buffer.
440 *
441 * Returns the number of bytes read or -1 in case of error.
442 */
443 int
xmlParserGrow(xmlParserCtxtPtr ctxt)444 xmlParserGrow(xmlParserCtxtPtr ctxt) {
445 xmlParserInputPtr in = ctxt->input;
446 xmlParserInputBufferPtr buf = in->buf;
447 size_t curEnd = in->end - in->cur;
448 size_t curBase = in->cur - in->base;
449 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
450 XML_MAX_HUGE_LENGTH :
451 XML_MAX_LOOKUP_LIMIT;
452 int ret;
453
454 if (buf == NULL)
455 return(0);
456 /* Don't grow push parser buffer. */
457 if (PARSER_PROGRESSIVE(ctxt))
458 return(0);
459 /* Don't grow memory buffers. */
460 if ((buf->encoder == NULL) && (buf->readcallback == NULL))
461 return(0);
462 if (buf->error != 0)
463 return(-1);
464
465 if (curBase > maxLength) {
466 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
467 "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
468 xmlHaltParser(ctxt);
469 return(-1);
470 }
471
472 if (curEnd >= INPUT_CHUNK)
473 return(0);
474
475 ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
476 xmlBufUpdateInput(buf->buffer, in, curBase);
477
478 if (ret < 0) {
479 xmlCtxtErrIO(ctxt, buf->error, NULL);
480 }
481
482 return(ret);
483 }
484
485 /**
486 * xmlParserInputGrow:
487 * @in: an XML parser input
488 * @len: an indicative size for the lookahead
489 *
490 * DEPRECATED: Don't use.
491 *
492 * This function increase the input for the parser. It tries to
493 * preserve pointers to the input buffer, and keep already read data
494 *
495 * Returns the amount of char read, or -1 in case of error, 0 indicate the
496 * end of this entity
497 */
498 int
xmlParserInputGrow(xmlParserInputPtr in,int len)499 xmlParserInputGrow(xmlParserInputPtr in, int len) {
500 int ret;
501 size_t indx;
502
503 if ((in == NULL) || (len < 0)) return(-1);
504 if (in->buf == NULL) return(-1);
505 if (in->base == NULL) return(-1);
506 if (in->cur == NULL) return(-1);
507 if (in->buf->buffer == NULL) return(-1);
508
509 /* Don't grow memory buffers. */
510 if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
511 return(0);
512
513 indx = in->cur - in->base;
514 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
515 return(0);
516 }
517 ret = xmlParserInputBufferGrow(in->buf, len);
518
519 in->base = xmlBufContent(in->buf->buffer);
520 if (in->base == NULL) {
521 in->base = BAD_CAST "";
522 in->cur = in->base;
523 in->end = in->base;
524 return(-1);
525 }
526 in->cur = in->base + indx;
527 in->end = xmlBufEnd(in->buf->buffer);
528
529 return(ret);
530 }
531
532 /**
533 * xmlParserShrink:
534 * @ctxt: an XML parser context
535 *
536 * Shrink the input buffer.
537 */
538 void
xmlParserShrink(xmlParserCtxtPtr ctxt)539 xmlParserShrink(xmlParserCtxtPtr ctxt) {
540 xmlParserInputPtr in = ctxt->input;
541 xmlParserInputBufferPtr buf = in->buf;
542 size_t used;
543
544 if (buf == NULL)
545 return;
546 /* Don't shrink pull parser memory buffers. */
547 if ((!PARSER_PROGRESSIVE(ctxt)) &&
548 (buf->encoder == NULL) &&
549 (buf->readcallback == NULL))
550 return;
551
552 used = in->cur - in->base;
553 /*
554 * Do not shrink on large buffers whose only a tiny fraction
555 * was consumed
556 */
557 if (used > INPUT_CHUNK) {
558 size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
559
560 if (res > 0) {
561 used -= res;
562 if ((res > ULONG_MAX) ||
563 (in->consumed > ULONG_MAX - (unsigned long)res))
564 in->consumed = ULONG_MAX;
565 else
566 in->consumed += res;
567 }
568 }
569
570 xmlBufUpdateInput(buf->buffer, in, used);
571 }
572
573 /**
574 * xmlParserInputShrink:
575 * @in: an XML parser input
576 *
577 * DEPRECATED: Don't use.
578 *
579 * This function removes used input for the parser.
580 */
581 void
xmlParserInputShrink(xmlParserInputPtr in)582 xmlParserInputShrink(xmlParserInputPtr in) {
583 size_t used;
584 size_t ret;
585
586 if (in == NULL) return;
587 if (in->buf == NULL) return;
588 if (in->base == NULL) return;
589 if (in->cur == NULL) return;
590 if (in->buf->buffer == NULL) return;
591
592 used = in->cur - in->base;
593 /*
594 * Do not shrink on large buffers whose only a tiny fraction
595 * was consumed
596 */
597 if (used > INPUT_CHUNK) {
598 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
599 if (ret > 0) {
600 used -= ret;
601 if ((ret > ULONG_MAX) ||
602 (in->consumed > ULONG_MAX - (unsigned long)ret))
603 in->consumed = ULONG_MAX;
604 else
605 in->consumed += ret;
606 }
607 }
608
609 if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
610 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
611 }
612
613 in->base = xmlBufContent(in->buf->buffer);
614 if (in->base == NULL) {
615 /* TODO: raise error */
616 in->base = BAD_CAST "";
617 in->cur = in->base;
618 in->end = in->base;
619 return;
620 }
621 in->cur = in->base + used;
622 in->end = xmlBufEnd(in->buf->buffer);
623 }
624
625 /************************************************************************
626 * *
627 * UTF8 character input and related functions *
628 * *
629 ************************************************************************/
630
631 /**
632 * xmlNextChar:
633 * @ctxt: the XML parser context
634 *
635 * DEPRECATED: Internal function, do not use.
636 *
637 * Skip to the next char input char.
638 */
639
640 void
xmlNextChar(xmlParserCtxtPtr ctxt)641 xmlNextChar(xmlParserCtxtPtr ctxt)
642 {
643 const unsigned char *cur;
644 size_t avail;
645 int c;
646
647 if ((ctxt == NULL) || (ctxt->input == NULL))
648 return;
649
650 avail = ctxt->input->end - ctxt->input->cur;
651
652 if (avail < INPUT_CHUNK) {
653 xmlParserGrow(ctxt);
654 if (ctxt->input->cur >= ctxt->input->end)
655 return;
656 avail = ctxt->input->end - ctxt->input->cur;
657 }
658
659 cur = ctxt->input->cur;
660 c = *cur;
661
662 if (c < 0x80) {
663 if (c == '\n') {
664 ctxt->input->cur++;
665 ctxt->input->line++;
666 ctxt->input->col = 1;
667 } else if (c == '\r') {
668 /*
669 * 2.11 End-of-Line Handling
670 * the literal two-character sequence "#xD#xA" or a standalone
671 * literal #xD, an XML processor must pass to the application
672 * the single character #xA.
673 */
674 ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
675 ctxt->input->line++;
676 ctxt->input->col = 1;
677 return;
678 } else {
679 ctxt->input->cur++;
680 ctxt->input->col++;
681 }
682 } else {
683 ctxt->input->col++;
684
685 if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
686 goto encoding_error;
687
688 if (c < 0xe0) {
689 /* 2-byte code */
690 if (c < 0xc2)
691 goto encoding_error;
692 ctxt->input->cur += 2;
693 } else {
694 unsigned int val = (c << 8) | cur[1];
695
696 if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
697 goto encoding_error;
698
699 if (c < 0xf0) {
700 /* 3-byte code */
701 if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
702 goto encoding_error;
703 ctxt->input->cur += 3;
704 } else {
705 if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
706 goto encoding_error;
707
708 /* 4-byte code */
709 if ((val < 0xf090) || (val >= 0xf490))
710 goto encoding_error;
711 ctxt->input->cur += 4;
712 }
713 }
714 }
715
716 return;
717
718 encoding_error:
719 /* Only report the first error */
720 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
721 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
722 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
723 }
724 ctxt->input->cur++;
725 return;
726 }
727
728 /**
729 * xmlCurrentChar:
730 * @ctxt: the XML parser context
731 * @len: pointer to the length of the char read
732 *
733 * DEPRECATED: Internal function, do not use.
734 *
735 * The current char value, if using UTF-8 this may actually span multiple
736 * bytes in the input buffer. Implement the end of line normalization:
737 * 2.11 End-of-Line Handling
738 * Wherever an external parsed entity or the literal entity value
739 * of an internal parsed entity contains either the literal two-character
740 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
741 * must pass to the application the single character #xA.
742 * This behavior can conveniently be produced by normalizing all
743 * line breaks to #xA on input, before parsing.)
744 *
745 * Returns the current char value and its length
746 */
747
748 int
xmlCurrentChar(xmlParserCtxtPtr ctxt,int * len)749 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
750 const unsigned char *cur;
751 size_t avail;
752 int c;
753
754 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
755
756 avail = ctxt->input->end - ctxt->input->cur;
757
758 if (avail < INPUT_CHUNK) {
759 xmlParserGrow(ctxt);
760 avail = ctxt->input->end - ctxt->input->cur;
761 }
762
763 cur = ctxt->input->cur;
764 c = *cur;
765
766 if (c < 0x80) {
767 /* 1-byte code */
768 if (c < 0x20) {
769 /*
770 * 2.11 End-of-Line Handling
771 * the literal two-character sequence "#xD#xA" or a standalone
772 * literal #xD, an XML processor must pass to the application
773 * the single character #xA.
774 */
775 if (c == '\r') {
776 /*
777 * TODO: This function shouldn't change the 'cur' pointer
778 * as side effect, but the NEXTL macro in parser.c relies
779 * on this behavior when incrementing line numbers.
780 */
781 if (cur[1] == '\n')
782 ctxt->input->cur++;
783 *len = 1;
784 c = '\n';
785 } else if (c == 0) {
786 if (ctxt->input->cur >= ctxt->input->end) {
787 *len = 0;
788 } else {
789 *len = 1;
790 /*
791 * TODO: Null bytes should be handled by callers,
792 * but this can be tricky.
793 */
794 xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
795 "Char 0x0 out of allowed range\n");
796 }
797 } else {
798 *len = 1;
799 }
800 } else {
801 *len = 1;
802 }
803
804 return(c);
805 } else {
806 int val;
807
808 if (avail < 2)
809 goto incomplete_sequence;
810 if ((cur[1] & 0xc0) != 0x80)
811 goto encoding_error;
812
813 if (c < 0xe0) {
814 /* 2-byte code */
815 if (c < 0xc2)
816 goto encoding_error;
817 val = (c & 0x1f) << 6;
818 val |= cur[1] & 0x3f;
819 *len = 2;
820 } else {
821 if (avail < 3)
822 goto incomplete_sequence;
823 if ((cur[2] & 0xc0) != 0x80)
824 goto encoding_error;
825
826 if (c < 0xf0) {
827 /* 3-byte code */
828 val = (c & 0xf) << 12;
829 val |= (cur[1] & 0x3f) << 6;
830 val |= cur[2] & 0x3f;
831 if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
832 goto encoding_error;
833 *len = 3;
834 } else {
835 if (avail < 4)
836 goto incomplete_sequence;
837 if ((cur[3] & 0xc0) != 0x80)
838 goto encoding_error;
839
840 /* 4-byte code */
841 val = (c & 0x0f) << 18;
842 val |= (cur[1] & 0x3f) << 12;
843 val |= (cur[2] & 0x3f) << 6;
844 val |= cur[3] & 0x3f;
845 if ((val < 0x10000) || (val >= 0x110000))
846 goto encoding_error;
847 *len = 4;
848 }
849 }
850
851 return(val);
852 }
853
854 encoding_error:
855 /* Only report the first error */
856 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
857 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
858 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
859 }
860 *len = 1;
861 return(0xFFFD); /* U+FFFD Replacement Character */
862
863 incomplete_sequence:
864 /*
865 * An encoding problem may arise from a truncated input buffer
866 * splitting a character in the middle. In that case do not raise
867 * an error but return 0. This should only happen when push parsing
868 * char data.
869 */
870 *len = 0;
871 return(0);
872 }
873
874 /**
875 * xmlStringCurrentChar:
876 * @ctxt: the XML parser context
877 * @cur: pointer to the beginning of the char
878 * @len: pointer to the length of the char read
879 *
880 * DEPRECATED: Internal function, do not use.
881 *
882 * The current char value, if using UTF-8 this may actually span multiple
883 * bytes in the input buffer.
884 *
885 * Returns the current char value and its length
886 */
887
888 int
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,const xmlChar * cur,int * len)889 xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
890 const xmlChar *cur, int *len) {
891 int c;
892
893 if ((cur == NULL) || (len == NULL))
894 return(0);
895
896 /* cur is zero-terminated, so we can lie about its length. */
897 *len = 4;
898 c = xmlGetUTF8Char(cur, len);
899
900 return((c < 0) ? 0 : c);
901 }
902
903 /**
904 * xmlCopyCharMultiByte:
905 * @out: pointer to an array of xmlChar
906 * @val: the char value
907 *
908 * append the char value in the array
909 *
910 * Returns the number of xmlChar written
911 */
912 int
xmlCopyCharMultiByte(xmlChar * out,int val)913 xmlCopyCharMultiByte(xmlChar *out, int val) {
914 if ((out == NULL) || (val < 0)) return(0);
915 /*
916 * We are supposed to handle UTF8, check it's valid
917 * From rfc2044: encoding of the Unicode values on UTF-8:
918 *
919 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
920 * 0000 0000-0000 007F 0xxxxxxx
921 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
922 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
923 */
924 if (val >= 0x80) {
925 xmlChar *savedout = out;
926 int bits;
927 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
928 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
929 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
930 else {
931 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
932 fprintf(stderr, "xmlCopyCharMultiByte: codepoint out of range\n");
933 abort();
934 #endif
935 return(0);
936 }
937 for ( ; bits >= 0; bits-= 6)
938 *out++= ((val >> bits) & 0x3F) | 0x80 ;
939 return (out - savedout);
940 }
941 *out = val;
942 return 1;
943 }
944
945 /**
946 * xmlCopyChar:
947 * @len: Ignored, compatibility
948 * @out: pointer to an array of xmlChar
949 * @val: the char value
950 *
951 * append the char value in the array
952 *
953 * Returns the number of xmlChar written
954 */
955
956 int
xmlCopyChar(int len ATTRIBUTE_UNUSED,xmlChar * out,int val)957 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
958 if ((out == NULL) || (val < 0)) return(0);
959 /* the len parameter is ignored */
960 if (val >= 0x80) {
961 return(xmlCopyCharMultiByte (out, val));
962 }
963 *out = val;
964 return 1;
965 }
966
967 /************************************************************************
968 * *
969 * Commodity functions to switch encodings *
970 * *
971 ************************************************************************/
972
973 static int
xmlDetectEBCDIC(xmlParserInputPtr input,xmlCharEncodingHandlerPtr * hout)974 xmlDetectEBCDIC(xmlParserInputPtr input, xmlCharEncodingHandlerPtr *hout) {
975 xmlChar out[200];
976 xmlCharEncodingHandlerPtr handler;
977 int inlen, outlen, res, i;
978
979 *hout = NULL;
980
981 /*
982 * To detect the EBCDIC code page, we convert the first 200 bytes
983 * to EBCDIC-US and try to find the encoding declaration.
984 */
985 res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
986 if (res != 0)
987 return(res);
988 outlen = sizeof(out) - 1;
989 inlen = input->end - input->cur;
990 res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
991 /*
992 * Return the EBCDIC handler if decoding failed. The error will
993 * be reported later.
994 */
995 if (res < 0)
996 goto done;
997 out[outlen] = 0;
998
999 for (i = 0; i < outlen; i++) {
1000 if (out[i] == '>')
1001 break;
1002 if ((out[i] == 'e') &&
1003 (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1004 int start, cur, quote;
1005
1006 i += 8;
1007 while (IS_BLANK_CH(out[i]))
1008 i += 1;
1009 if (out[i++] != '=')
1010 break;
1011 while (IS_BLANK_CH(out[i]))
1012 i += 1;
1013 quote = out[i++];
1014 if ((quote != '\'') && (quote != '"'))
1015 break;
1016 start = i;
1017 cur = out[i];
1018 while (((cur >= 'a') && (cur <= 'z')) ||
1019 ((cur >= 'A') && (cur <= 'Z')) ||
1020 ((cur >= '0') && (cur <= '9')) ||
1021 (cur == '.') || (cur == '_') ||
1022 (cur == '-'))
1023 cur = out[++i];
1024 if (cur != quote)
1025 break;
1026 out[i] = 0;
1027 xmlCharEncCloseFunc(handler);
1028 res = xmlOpenCharEncodingHandler((char *) out + start,
1029 /* output */ 0, &handler);
1030 if (res != 0)
1031 return(res);
1032 *hout = handler;
1033 return(0);
1034 }
1035 }
1036
1037 done:
1038 /*
1039 * Encoding handlers are stateful, so we have to recreate them.
1040 */
1041 xmlCharEncCloseFunc(handler);
1042 res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
1043 if (res != 0)
1044 return(res);
1045 *hout = handler;
1046 return(0);
1047 }
1048
1049 /**
1050 * xmlSwitchEncoding:
1051 * @ctxt: the parser context
1052 * @enc: the encoding value (number)
1053 *
1054 * Use encoding specified by enum to decode input data. This overrides
1055 * the encoding found in the XML declaration.
1056 *
1057 * This function can also be used to override the encoding of chunks
1058 * passed to xmlParseChunk.
1059 *
1060 * Returns 0 in case of success, -1 otherwise
1061 */
1062 int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt,xmlCharEncoding enc)1063 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1064 {
1065 xmlCharEncodingHandlerPtr handler = NULL;
1066 int ret;
1067 int res;
1068
1069 if ((ctxt == NULL) || (ctxt->input == NULL))
1070 return(-1);
1071
1072 switch (enc) {
1073 case XML_CHAR_ENCODING_NONE:
1074 case XML_CHAR_ENCODING_UTF8:
1075 case XML_CHAR_ENCODING_ASCII:
1076 res = 0;
1077 break;
1078 case XML_CHAR_ENCODING_EBCDIC:
1079 res = xmlDetectEBCDIC(ctxt->input, &handler);
1080 break;
1081 default:
1082 res = xmlLookupCharEncodingHandler(enc, &handler);
1083 break;
1084 }
1085
1086 if (res != 0) {
1087 const char *name = xmlGetCharEncodingName(enc);
1088
1089 xmlFatalErr(ctxt, res, (name ? name : "<null>"));
1090 return(-1);
1091 }
1092
1093 ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1094
1095 if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1096 ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1097 }
1098
1099 return(ret);
1100 }
1101
1102 /**
1103 * xmlSwitchInputEncodingName:
1104 * @ctxt: the parser context, only for error reporting
1105 * @input: the input strea,
1106 * @encoding: the encoding name
1107 *
1108 * Returns 0 in case of success, -1 otherwise
1109 */
1110 static int
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,const char * encoding)1111 xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1112 const char *encoding) {
1113 xmlCharEncodingHandlerPtr handler;
1114 int res;
1115
1116 if (encoding == NULL)
1117 return(-1);
1118
1119 res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
1120 if (res != 0) {
1121 xmlFatalErr(ctxt, res, encoding);
1122 return(-1);
1123 }
1124
1125 return(xmlSwitchInputEncoding(ctxt, input, handler));
1126 }
1127
1128 /**
1129 * xmlSwitchEncodingName:
1130 * @ctxt: the parser context
1131 * @encoding: the encoding name
1132 *
1133 * Use specified encoding to decode input data. This overrides the
1134 * encoding found in the XML declaration.
1135 *
1136 * This function can also be used to override the encoding of chunks
1137 * passed to xmlParseChunk.
1138 *
1139 * Available since 2.13.0.
1140 *
1141 * Returns 0 in case of success, -1 otherwise
1142 */
1143 int
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt,const char * encoding)1144 xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
1145 if (ctxt == NULL)
1146 return(-1);
1147
1148 return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1149 }
1150
1151 /**
1152 * xmlInputSetEncodingHandler:
1153 * @input: the input stream
1154 * @handler: the encoding handler
1155 *
1156 * Use encoding handler to decode input data.
1157 *
1158 * Closes the handler on error.
1159 *
1160 * Returns an xmlParserErrors code.
1161 */
1162 static int
xmlInputSetEncodingHandler(xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1163 xmlInputSetEncodingHandler(xmlParserInputPtr input,
1164 xmlCharEncodingHandlerPtr handler) {
1165 int nbchars;
1166 xmlParserInputBufferPtr in;
1167
1168 if ((input == NULL) || (input->buf == NULL)) {
1169 xmlCharEncCloseFunc(handler);
1170 return(XML_ERR_ARGUMENT);
1171 }
1172 in = input->buf;
1173
1174 input->flags |= XML_INPUT_HAS_ENCODING;
1175
1176 /*
1177 * UTF-8 requires no encoding handler.
1178 */
1179 if ((handler != NULL) &&
1180 (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1181 xmlCharEncCloseFunc(handler);
1182 handler = NULL;
1183 }
1184
1185 if (in->encoder == handler)
1186 return(XML_ERR_OK);
1187
1188 if (in->encoder != NULL) {
1189 /*
1190 * Switching encodings during parsing is a really bad idea,
1191 * but Chromium can switch between ISO-8859-1 and UTF-16 before
1192 * separate calls to xmlParseChunk.
1193 *
1194 * TODO: We should check whether the "raw" input buffer is empty and
1195 * convert the old content using the old encoder.
1196 */
1197
1198 xmlCharEncCloseFunc(in->encoder);
1199 in->encoder = handler;
1200 return(XML_ERR_OK);
1201 }
1202
1203 in->encoder = handler;
1204
1205 /*
1206 * Is there already some content down the pipe to convert ?
1207 */
1208 if (xmlBufIsEmpty(in->buffer) == 0) {
1209 xmlBufPtr buf;
1210 size_t processed;
1211
1212 buf = xmlBufCreate();
1213 if (buf == NULL)
1214 return(XML_ERR_NO_MEMORY);
1215
1216 /*
1217 * Shrink the current input buffer.
1218 * Move it as the raw buffer and create a new input buffer
1219 */
1220 processed = input->cur - input->base;
1221 xmlBufShrink(in->buffer, processed);
1222 input->consumed += processed;
1223 in->raw = in->buffer;
1224 in->buffer = buf;
1225 in->rawconsumed = processed;
1226
1227 nbchars = xmlCharEncInput(in);
1228 xmlBufResetInput(in->buffer, input);
1229 if (nbchars < 0)
1230 return(in->error);
1231 }
1232
1233 return(XML_ERR_OK);
1234 }
1235
1236 /**
1237 * xmlInputSetEncoding:
1238 * @input: the input stream
1239 * @encoding: the encoding name
1240 *
1241 * Use specified encoding to decode input data. This overrides the
1242 * encoding found in the XML declaration.
1243 *
1244 * Available since 2.14.0.
1245 *
1246 * Returns an xmlParserErrors code.
1247 */
1248 int
xmlInputSetEncoding(xmlParserInputPtr input,const char * encoding)1249 xmlInputSetEncoding(xmlParserInputPtr input, const char *encoding) {
1250 xmlCharEncodingHandlerPtr handler;
1251 int res;
1252
1253 if (encoding == NULL)
1254 return(XML_ERR_ARGUMENT);
1255
1256 res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
1257 if (res != 0)
1258 return(res);
1259
1260 return(xmlInputSetEncodingHandler(input, handler));
1261 }
1262
1263 /**
1264 * xmlSwitchInputEncoding:
1265 * @ctxt: the parser context, only for error reporting
1266 * @input: the input stream
1267 * @handler: the encoding handler
1268 *
1269 * DEPRECATED: Internal function, don't use.
1270 *
1271 * Use encoding handler to decode input data.
1272 *
1273 * Returns 0 in case of success, -1 otherwise
1274 */
1275 int
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1276 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1277 xmlCharEncodingHandlerPtr handler) {
1278 int code = xmlInputSetEncodingHandler(input, handler);
1279
1280 if (code != XML_ERR_OK) {
1281 xmlCtxtErrIO(ctxt, code, NULL);
1282 return(-1);
1283 }
1284
1285 return(0);
1286 }
1287
1288 /**
1289 * xmlSwitchToEncoding:
1290 * @ctxt: the parser context
1291 * @handler: the encoding handler
1292 *
1293 * Use encoding handler to decode input data.
1294 *
1295 * This function can be used to enforce the encoding of chunks passed
1296 * to xmlParseChunk.
1297 *
1298 * Returns 0 in case of success, -1 otherwise
1299 */
1300 int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt,xmlCharEncodingHandlerPtr handler)1301 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1302 {
1303 if (ctxt == NULL)
1304 return(-1);
1305 return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1306 }
1307
1308 /**
1309 * xmlDetectEncoding:
1310 * @ctxt: the parser context
1311 *
1312 * Handle optional BOM, detect and switch to encoding.
1313 *
1314 * Assumes that there are at least four bytes in the input buffer.
1315 */
1316 void
xmlDetectEncoding(xmlParserCtxtPtr ctxt)1317 xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1318 const xmlChar *in;
1319 xmlCharEncoding enc;
1320 int bomSize;
1321 int autoFlag = 0;
1322
1323 if (xmlParserGrow(ctxt) < 0)
1324 return;
1325 in = ctxt->input->cur;
1326 if (ctxt->input->end - in < 4)
1327 return;
1328
1329 if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1330 /*
1331 * If the encoding was already set, only skip the BOM which was
1332 * possibly decoded to UTF-8.
1333 */
1334 if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1335 ctxt->input->cur += 3;
1336 }
1337
1338 return;
1339 }
1340
1341 enc = XML_CHAR_ENCODING_NONE;
1342 bomSize = 0;
1343
1344 switch (in[0]) {
1345 case 0x00:
1346 if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1347 enc = XML_CHAR_ENCODING_UCS4BE;
1348 autoFlag = XML_INPUT_AUTO_OTHER;
1349 } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1350 enc = XML_CHAR_ENCODING_UTF16BE;
1351 autoFlag = XML_INPUT_AUTO_UTF16BE;
1352 }
1353 break;
1354
1355 case 0x3C:
1356 if (in[1] == 0x00) {
1357 if ((in[2] == 0x00) && (in[3] == 0x00)) {
1358 enc = XML_CHAR_ENCODING_UCS4LE;
1359 autoFlag = XML_INPUT_AUTO_OTHER;
1360 } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1361 enc = XML_CHAR_ENCODING_UTF16LE;
1362 autoFlag = XML_INPUT_AUTO_UTF16LE;
1363 }
1364 }
1365 break;
1366
1367 case 0x4C:
1368 if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1369 enc = XML_CHAR_ENCODING_EBCDIC;
1370 autoFlag = XML_INPUT_AUTO_OTHER;
1371 }
1372 break;
1373
1374 case 0xEF:
1375 if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1376 enc = XML_CHAR_ENCODING_UTF8;
1377 autoFlag = XML_INPUT_AUTO_UTF8;
1378 bomSize = 3;
1379 }
1380 break;
1381
1382 case 0xFE:
1383 if (in[1] == 0xFF) {
1384 enc = XML_CHAR_ENCODING_UTF16BE;
1385 autoFlag = XML_INPUT_AUTO_UTF16BE;
1386 bomSize = 2;
1387 }
1388 break;
1389
1390 case 0xFF:
1391 if (in[1] == 0xFE) {
1392 enc = XML_CHAR_ENCODING_UTF16LE;
1393 autoFlag = XML_INPUT_AUTO_UTF16LE;
1394 bomSize = 2;
1395 }
1396 break;
1397 }
1398
1399 if (bomSize > 0) {
1400 ctxt->input->cur += bomSize;
1401 }
1402
1403 if (enc != XML_CHAR_ENCODING_NONE) {
1404 ctxt->input->flags |= autoFlag;
1405 xmlSwitchEncoding(ctxt, enc);
1406 }
1407 }
1408
1409 /**
1410 * xmlSetDeclaredEncoding:
1411 * @ctxt: the parser context
1412 * @encoding: declared encoding
1413 *
1414 * Set the encoding from a declaration in the document.
1415 *
1416 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1417 * about encoding mismatches.
1418 *
1419 * Takes ownership of 'encoding'.
1420 */
1421 void
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt,xmlChar * encoding)1422 xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1423 if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1424 ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1425 xmlSwitchEncodingName(ctxt, (const char *) encoding);
1426 ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1427 } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1428 static const char *allowedUTF8[] = {
1429 "UTF-8", "UTF8", NULL
1430 };
1431 static const char *allowedUTF16LE[] = {
1432 "UTF-16", "UTF-16LE", "UTF16", NULL
1433 };
1434 static const char *allowedUTF16BE[] = {
1435 "UTF-16", "UTF-16BE", "UTF16", NULL
1436 };
1437 const char **allowed = NULL;
1438 const char *autoEnc = NULL;
1439
1440 switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1441 case XML_INPUT_AUTO_UTF8:
1442 allowed = allowedUTF8;
1443 autoEnc = "UTF-8";
1444 break;
1445 case XML_INPUT_AUTO_UTF16LE:
1446 allowed = allowedUTF16LE;
1447 autoEnc = "UTF-16LE";
1448 break;
1449 case XML_INPUT_AUTO_UTF16BE:
1450 allowed = allowedUTF16BE;
1451 autoEnc = "UTF-16BE";
1452 break;
1453 }
1454
1455 if (allowed != NULL) {
1456 const char **p;
1457 int match = 0;
1458
1459 for (p = allowed; *p != NULL; p++) {
1460 if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1461 match = 1;
1462 break;
1463 }
1464 }
1465
1466 if (match == 0) {
1467 xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1468 "Encoding '%s' doesn't match "
1469 "auto-detected '%s'\n",
1470 encoding, BAD_CAST autoEnc);
1471 xmlFree(encoding);
1472 encoding = xmlStrdup(BAD_CAST autoEnc);
1473 if (encoding == NULL)
1474 xmlCtxtErrMemory(ctxt);
1475 }
1476 }
1477 }
1478
1479 if (ctxt->encoding != NULL)
1480 xmlFree((xmlChar *) ctxt->encoding);
1481 ctxt->encoding = encoding;
1482 }
1483
1484 /**
1485 * xmlGetActualEncoding:
1486 * @ctxt: the parser context
1487 *
1488 * Returns the actual used to parse the document. This can differ from
1489 * the declared encoding.
1490 */
1491 const xmlChar *
xmlGetActualEncoding(xmlParserCtxtPtr ctxt)1492 xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1493 const xmlChar *encoding = NULL;
1494
1495 if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1496 (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1497 /* Preserve encoding exactly */
1498 encoding = ctxt->encoding;
1499 } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1500 encoding = BAD_CAST ctxt->input->buf->encoder->name;
1501 } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1502 encoding = BAD_CAST "UTF-8";
1503 }
1504
1505 return(encoding);
1506 }
1507
1508 /************************************************************************
1509 * *
1510 * Commodity functions to handle entities processing *
1511 * *
1512 ************************************************************************/
1513
1514 /**
1515 * xmlFreeInputStream:
1516 * @input: an xmlParserInputPtr
1517 *
1518 * Free up an input stream.
1519 */
1520 void
xmlFreeInputStream(xmlParserInputPtr input)1521 xmlFreeInputStream(xmlParserInputPtr input) {
1522 if (input == NULL) return;
1523
1524 if (input->filename != NULL) xmlFree((char *) input->filename);
1525 if (input->version != NULL) xmlFree((char *) input->version);
1526 if ((input->free != NULL) && (input->base != NULL))
1527 input->free((xmlChar *) input->base);
1528 if (input->buf != NULL)
1529 xmlFreeParserInputBuffer(input->buf);
1530 xmlFree(input);
1531 }
1532
1533 /**
1534 * xmlNewInputStream:
1535 * @ctxt: an XML parser context
1536 *
1537 * Create a new input stream structure.
1538 *
1539 * Returns the new input stream or NULL
1540 */
1541 xmlParserInputPtr
xmlNewInputStream(xmlParserCtxtPtr ctxt)1542 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1543 xmlParserInputPtr input;
1544
1545 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1546 if (input == NULL) {
1547 xmlCtxtErrMemory(ctxt);
1548 return(NULL);
1549 }
1550 memset(input, 0, sizeof(xmlParserInput));
1551 input->line = 1;
1552 input->col = 1;
1553
1554 return(input);
1555 }
1556
1557 /**
1558 * xmlNewInputURL:
1559 * @ctxt: parser context
1560 * @url: filename or URL
1561 * @publicId: publid ID from doctype (optional)
1562 * @encoding: character encoding (optional)
1563 * @flags: unused, pass 0
1564 *
1565 * Creates a new parser input from the filesystem, the network or
1566 * a user-defined resource loader.
1567 *
1568 * Returns a new parser input.
1569 */
1570 xmlParserInputPtr
xmlNewInputURL(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,const char * encoding,int flags ATTRIBUTE_UNUSED)1571 xmlNewInputURL(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
1572 const char *encoding, int flags ATTRIBUTE_UNUSED) {
1573 xmlParserInputPtr input;
1574
1575 if ((ctxt == NULL) || (url == NULL))
1576 return(NULL);
1577
1578 input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1579 if (input == NULL)
1580 return(NULL);
1581
1582 if (encoding != NULL)
1583 xmlSwitchInputEncodingName(ctxt, input, encoding);
1584
1585 return(input);
1586 }
1587
1588 /**
1589 * xmlNewInputInternal:
1590 * @buf: parser input buffer
1591 * @filename: filename or URL
1592 *
1593 * Internal helper function.
1594 *
1595 * Returns a new parser input.
1596 */
1597 static xmlParserInputPtr
xmlNewInputInternal(xmlParserInputBufferPtr buf,const char * filename)1598 xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1599 xmlParserInputPtr input;
1600
1601 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1602 if (input == NULL) {
1603 xmlFreeParserInputBuffer(buf);
1604 return(NULL);
1605 }
1606 memset(input, 0, sizeof(xmlParserInput));
1607 input->line = 1;
1608 input->col = 1;
1609
1610 input->buf = buf;
1611 xmlBufResetInput(input->buf->buffer, input);
1612
1613 if (filename != NULL) {
1614 input->filename = xmlMemStrdup(filename);
1615 if (input->filename == NULL) {
1616 xmlFreeInputStream(input);
1617 return(NULL);
1618 }
1619 }
1620
1621 return(input);
1622 }
1623
1624 /**
1625 * xmlInputCreateMemory:
1626 * @url: base URL (optional)
1627 * @mem: pointer to char array
1628 * @size: size of array
1629 * @flags: optimization hints
1630 *
1631 * Creates a new parser input to read from a memory area.
1632 *
1633 * @url is used as base to resolve external entities and for
1634 * error reporting.
1635 *
1636 * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1637 * stay unchanged until parsing has finished. This can avoid
1638 * temporary copies.
1639 *
1640 * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1641 * area must contain a zero byte after the buffer at position @size.
1642 * This can avoid temporary copies.
1643 *
1644 * Available since 2.14.0.
1645 *
1646 * Returns a new parser input or NULL if a memory allocation failed.
1647 */
1648 xmlParserInputPtr
xmlInputCreateMemory(const char * url,const void * mem,size_t size,int flags)1649 xmlInputCreateMemory(const char *url, const void *mem, size_t size,
1650 int flags) {
1651 xmlParserInputBufferPtr buf;
1652
1653 if (mem == NULL)
1654 return(NULL);
1655
1656 buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1657 if (buf == NULL)
1658 return(NULL);
1659
1660 return(xmlNewInputInternal(buf, url));
1661 }
1662
1663 /**
1664 * xmlNewInputMemory:
1665 * @ctxt: parser context
1666 * @url: base URL (optional)
1667 * @mem: pointer to char array
1668 * @size: size of array
1669 * @encoding: character encoding (optional)
1670 * @flags: optimization hints
1671 *
1672 * Returns a new parser input or NULL in case of error.
1673 */
1674 xmlParserInputPtr
xmlNewInputMemory(xmlParserCtxtPtr ctxt,const char * url,const void * mem,size_t size,const char * encoding,int flags)1675 xmlNewInputMemory(xmlParserCtxtPtr ctxt, const char *url,
1676 const void *mem, size_t size,
1677 const char *encoding, int flags) {
1678 xmlParserInputPtr input;
1679
1680 if ((ctxt == NULL) || (mem == NULL))
1681 return(NULL);
1682
1683 input = xmlInputCreateMemory(url, mem, size, flags);
1684 if (input == NULL) {
1685 xmlCtxtErrMemory(ctxt);
1686 return(NULL);
1687 }
1688
1689 if (encoding != NULL)
1690 xmlSwitchInputEncodingName(ctxt, input, encoding);
1691
1692 return(input);
1693 }
1694
1695 /**
1696 * xmlInputCreateString:
1697 * @url: base URL (optional)
1698 * @str: zero-terminated string
1699 * @flags: optimization hints
1700 *
1701 * Creates a new parser input to read from a zero-terminated string.
1702 *
1703 * @url is used as base to resolve external entities and for
1704 * error reporting.
1705 *
1706 * If the XML_INPUT_BUF_STATIC flag is set, the string must
1707 * stay unchanged until parsing has finished. This can avoid
1708 * temporary copies.
1709 *
1710 * Available since 2.14.0.
1711 *
1712 * Returns a new parser input or NULL if a memory allocation failed.
1713 */
1714 xmlParserInputPtr
xmlInputCreateString(const char * url,const char * str,int flags)1715 xmlInputCreateString(const char *url, const char *str, int flags) {
1716 xmlParserInputBufferPtr buf;
1717
1718 if (str == NULL)
1719 return(NULL);
1720
1721 buf = xmlNewInputBufferString(str, flags);
1722 if (buf == NULL)
1723 return(NULL);
1724
1725 return(xmlNewInputInternal(buf, url));
1726 }
1727
1728 /**
1729 * xmlNewInputString:
1730 * @ctxt: parser context
1731 * @url: base URL (optional)
1732 * @str: zero-terminated string
1733 * @encoding: character encoding (optional)
1734 * @flags: optimization hints
1735 *
1736 * Returns a new parser input.
1737 */
1738 xmlParserInputPtr
xmlNewInputString(xmlParserCtxtPtr ctxt,const char * url,const char * str,const char * encoding,int flags)1739 xmlNewInputString(xmlParserCtxtPtr ctxt, const char *url,
1740 const char *str, const char *encoding, int flags) {
1741 xmlParserInputPtr input;
1742
1743 if ((ctxt == NULL) || (str == NULL))
1744 return(NULL);
1745
1746 input = xmlInputCreateString(url, str, flags);
1747 if (input == NULL) {
1748 xmlCtxtErrMemory(ctxt);
1749 return(NULL);
1750 }
1751
1752 if (encoding != NULL)
1753 xmlSwitchInputEncodingName(ctxt, input, encoding);
1754
1755 return(input);
1756 }
1757
1758 /**
1759 * xmlInputCreateFd:
1760 * @url: base URL (optional)
1761 * @fd: file descriptor
1762 * @flags: unused, pass 0
1763 *
1764 * Creates a new parser input to read from a zero-terminated string.
1765 *
1766 * @url is used as base to resolve external entities and for
1767 * error reporting.
1768 *
1769 * @fd is closed after parsing has finished.
1770 *
1771 * Available since 2.14.0.
1772 *
1773 * Returns a new parser input or NULL if a memory allocation failed.
1774 */
1775 xmlParserInputPtr
xmlInputCreateFd(const char * url,int fd,int flags ATTRIBUTE_UNUSED)1776 xmlInputCreateFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) {
1777 xmlParserInputBufferPtr buf;
1778
1779 if (fd < 0)
1780 return(NULL);
1781
1782 buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
1783 if (buf == NULL)
1784 return(NULL);
1785
1786 return(xmlNewInputInternal(buf, url));
1787 }
1788
1789 /**
1790 * xmlNewInputFd:
1791 * @ctxt: parser context
1792 * @url: base URL (optional)
1793 * @fd: file descriptor
1794 * @encoding: character encoding (optional)
1795 * @flags: unused, pass 0
1796 *
1797 * Returns a new parser input.
1798 */
1799 xmlParserInputPtr
xmlNewInputFd(xmlParserCtxtPtr ctxt,const char * url,int fd,const char * encoding,int flags)1800 xmlNewInputFd(xmlParserCtxtPtr ctxt, const char *url,
1801 int fd, const char *encoding, int flags) {
1802 xmlParserInputPtr input;
1803
1804 if ((ctxt == NULL) || (fd < 0))
1805 return(NULL);
1806
1807 input = xmlInputCreateFd(url, fd, flags);
1808 if (input == NULL) {
1809 xmlCtxtErrMemory(ctxt);
1810 return(NULL);
1811 }
1812
1813 if (encoding != NULL)
1814 xmlSwitchInputEncodingName(ctxt, input, encoding);
1815
1816 return(input);
1817 }
1818
1819 /**
1820 * xmlInputCreateIO:
1821 * @url: base URL (optional)
1822 * @ioRead: read callback
1823 * @ioClose: close callback (optional)
1824 * @ioCtxt: IO context
1825 * @flags: unused, pass 0
1826 *
1827 * Creates a new parser input to read from input callbacks and
1828 * cintext.
1829 *
1830 * @url is used as base to resolve external entities and for
1831 * error reporting.
1832 *
1833 * @ioRead is called to read new data into a provided buffer.
1834 * It must return the number of bytes written into the buffer
1835 * ot a negative xmlParserErrors code on failure.
1836 *
1837 * @ioClose is called after parsing has finished.
1838 *
1839 * @ioCtxt is an opaque pointer passed to the callbacks.
1840 *
1841 * Available since 2.14.0.
1842 *
1843 * Returns a new parser input or NULL if a memory allocation failed.
1844 */
1845 xmlParserInputPtr
xmlInputCreateIO(const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,int flags ATTRIBUTE_UNUSED)1846 xmlInputCreateIO(const char *url, xmlInputReadCallback ioRead,
1847 xmlInputCloseCallback ioClose, void *ioCtxt,
1848 int flags ATTRIBUTE_UNUSED) {
1849 xmlParserInputBufferPtr buf;
1850
1851 if (ioRead == NULL)
1852 return(NULL);
1853
1854 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1855 if (buf == NULL) {
1856 if (ioClose != NULL)
1857 ioClose(ioCtxt);
1858 return(NULL);
1859 }
1860
1861 buf->context = ioCtxt;
1862 buf->readcallback = ioRead;
1863 buf->closecallback = ioClose;
1864
1865 return(xmlNewInputInternal(buf, url));
1866 }
1867
1868 /**
1869 * xmlNewInputIO:
1870 * @ctxt: parser context
1871 * @url: base URL (optional)
1872 * @ioRead: read callback
1873 * @ioClose: close callback (optional)
1874 * @ioCtxt: IO context
1875 * @encoding: character encoding (optional)
1876 * @flags: unused, pass 0
1877 *
1878 * Returns a new parser input.
1879 */
1880 xmlParserInputPtr
xmlNewInputIO(xmlParserCtxtPtr ctxt,const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,const char * encoding,int flags)1881 xmlNewInputIO(xmlParserCtxtPtr ctxt, const char *url,
1882 xmlInputReadCallback ioRead, xmlInputCloseCallback ioClose,
1883 void *ioCtxt, const char *encoding, int flags) {
1884 xmlParserInputPtr input;
1885
1886 if ((ctxt == NULL) || (ioRead == NULL))
1887 return(NULL);
1888
1889 input = xmlInputCreateIO(url, ioRead, ioClose, ioCtxt, flags);
1890 if (input == NULL) {
1891 xmlCtxtErrMemory(ctxt);
1892 return(NULL);
1893 }
1894
1895 if (encoding != NULL)
1896 xmlSwitchInputEncodingName(ctxt, input, encoding);
1897
1898 return(input);
1899 }
1900
1901 /**
1902 * xmlInputCreatePush:
1903 * @url: base URL (optional)
1904 * @chunk: pointer to char array
1905 * @size: size of array
1906 *
1907 * Creates a new parser input for a push parser.
1908 *
1909 * Returns a new parser input or NULL if a memory allocation failed.
1910 */
1911 xmlParserInputPtr
xmlInputCreatePush(const char * url,const char * chunk,int size)1912 xmlInputCreatePush(const char *url, const char *chunk, int size) {
1913 xmlParserInputBufferPtr buf;
1914 xmlParserInputPtr input;
1915
1916 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1917 if (buf == NULL)
1918 return(NULL);
1919
1920 input = xmlNewInputInternal(buf, url);
1921 if (input == NULL)
1922 return(NULL);
1923
1924 input->flags |= XML_INPUT_PROGRESSIVE;
1925
1926 if ((size > 0) && (chunk != NULL)) {
1927 int res;
1928
1929 res = xmlParserInputBufferPush(input->buf, size, chunk);
1930 xmlBufResetInput(input->buf->buffer, input);
1931 if (res < 0) {
1932 xmlFreeInputStream(input);
1933 return(NULL);
1934 }
1935 }
1936
1937 return(input);
1938 }
1939
1940 /**
1941 * xmlNewIOInputStream:
1942 * @ctxt: an XML parser context
1943 * @buf: an input buffer
1944 * @enc: the charset encoding if known
1945 *
1946 * Create a new input stream structure encapsulating the @input into
1947 * a stream suitable for the parser.
1948 *
1949 * Returns the new input stream or NULL
1950 */
1951 xmlParserInputPtr
xmlNewIOInputStream(xmlParserCtxtPtr ctxt,xmlParserInputBufferPtr buf,xmlCharEncoding enc)1952 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
1953 xmlCharEncoding enc) {
1954 xmlParserInputPtr input;
1955 const char *encoding;
1956
1957 if (buf == NULL)
1958 return(NULL);
1959
1960 input = xmlNewInputInternal(buf, NULL);
1961 if (input == NULL) {
1962 xmlCtxtErrMemory(ctxt);
1963 return(NULL);
1964 }
1965
1966 encoding = xmlGetCharEncodingName(enc);
1967 if (encoding != NULL)
1968 xmlSwitchInputEncodingName(ctxt, input, encoding);
1969
1970 return(input);
1971 }
1972
1973 /**
1974 * xmlNewEntityInputStream:
1975 * @ctxt: an XML parser context
1976 * @ent: an Entity pointer
1977 *
1978 * DEPRECATED: Internal function, do not use.
1979 *
1980 * Create a new input stream based on an xmlEntityPtr
1981 *
1982 * Returns the new input stream or NULL
1983 */
1984 xmlParserInputPtr
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)1985 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
1986 xmlParserInputPtr input;
1987
1988 if ((ctxt == NULL) || (ent == NULL))
1989 return(NULL);
1990
1991 if (ent->content != NULL) {
1992 input = xmlNewInputString(ctxt, NULL, (const char *) ent->content,
1993 NULL, XML_INPUT_BUF_STATIC);
1994 } else if (ent->URI != NULL) {
1995 xmlResourceType rtype;
1996
1997 if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
1998 rtype = XML_RESOURCE_PARAMETER_ENTITY;
1999 else
2000 rtype = XML_RESOURCE_GENERAL_ENTITY;
2001
2002 input = xmlLoadResource(ctxt, (char *) ent->URI,
2003 (char *) ent->ExternalID, rtype);
2004 } else {
2005 return(NULL);
2006 }
2007
2008 if (input == NULL)
2009 return(NULL);
2010
2011 input->entity = ent;
2012
2013 return(input);
2014 }
2015
2016 /**
2017 * xmlNewStringInputStream:
2018 * @ctxt: an XML parser context
2019 * @buffer: an memory buffer
2020 *
2021 * Create a new input stream based on a memory buffer.
2022 *
2023 * Returns the new input stream
2024 */
2025 xmlParserInputPtr
xmlNewStringInputStream(xmlParserCtxtPtr ctxt,const xmlChar * buffer)2026 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2027 return(xmlNewInputString(ctxt, NULL, (const char *) buffer, NULL, 0));
2028 }
2029
2030
2031 /****************************************************************
2032 * *
2033 * External entities loading *
2034 * *
2035 ****************************************************************/
2036
2037 #ifdef LIBXML_CATALOG_ENABLED
2038
2039 /**
2040 * xmlResolveResourceFromCatalog:
2041 * @URL: the URL for the entity to load
2042 * @ID: the System ID for the entity to load
2043 * @ctxt: the context in which the entity is called or NULL
2044 *
2045 * Resolves the URL and ID against the appropriate catalog.
2046 * This function is used by xmlDefaultExternalEntityLoader and
2047 * xmlNoNetExternalEntityLoader.
2048 *
2049 * Returns a new allocated URL, or NULL.
2050 */
2051 static xmlChar *
xmlResolveResourceFromCatalog(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2052 xmlResolveResourceFromCatalog(const char *URL, const char *ID,
2053 xmlParserCtxtPtr ctxt) {
2054 xmlChar *resource = NULL;
2055 xmlCatalogAllow pref;
2056
2057 /*
2058 * If the resource doesn't exists as a file,
2059 * try to load it from the resource pointed in the catalogs
2060 */
2061 pref = xmlCatalogGetDefaults();
2062
2063 if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
2064 /*
2065 * Do a local lookup
2066 */
2067 if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
2068 ((pref == XML_CATA_ALLOW_ALL) ||
2069 (pref == XML_CATA_ALLOW_DOCUMENT))) {
2070 resource = xmlCatalogLocalResolve(ctxt->catalogs,
2071 (const xmlChar *)ID,
2072 (const xmlChar *)URL);
2073 }
2074 /*
2075 * Try a global lookup
2076 */
2077 if ((resource == NULL) &&
2078 ((pref == XML_CATA_ALLOW_ALL) ||
2079 (pref == XML_CATA_ALLOW_GLOBAL))) {
2080 resource = xmlCatalogResolve((const xmlChar *)ID,
2081 (const xmlChar *)URL);
2082 }
2083 if ((resource == NULL) && (URL != NULL))
2084 resource = xmlStrdup((const xmlChar *) URL);
2085
2086 /*
2087 * TODO: do an URI lookup on the reference
2088 */
2089 if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
2090 xmlChar *tmp = NULL;
2091
2092 if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
2093 ((pref == XML_CATA_ALLOW_ALL) ||
2094 (pref == XML_CATA_ALLOW_DOCUMENT))) {
2095 tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
2096 }
2097 if ((tmp == NULL) &&
2098 ((pref == XML_CATA_ALLOW_ALL) ||
2099 (pref == XML_CATA_ALLOW_GLOBAL))) {
2100 tmp = xmlCatalogResolveURI(resource);
2101 }
2102
2103 if (tmp != NULL) {
2104 xmlFree(resource);
2105 resource = tmp;
2106 }
2107 }
2108 }
2109
2110 return resource;
2111 }
2112
2113 #endif
2114
2115 /**
2116 * xmlCheckHTTPInput:
2117 * @ctxt: an XML parser context
2118 * @ret: an XML parser input
2119 *
2120 * DEPRECATED: Internal function, don't use.
2121 *
2122 * Check an input in case it was created from an HTTP stream, in that
2123 * case it will handle encoding and update of the base URL in case of
2124 * redirection. It also checks for HTTP errors in which case the input
2125 * is cleanly freed up and an appropriate error is raised in context
2126 *
2127 * Returns the input or NULL in case of HTTP error.
2128 */
2129 xmlParserInputPtr
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr ret)2130 xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
2131 /* Avoid unused variable warning if features are disabled. */
2132 (void) ctxt;
2133
2134 #ifdef LIBXML_HTTP_ENABLED
2135 if ((ret != NULL) && (ret->buf != NULL) &&
2136 (ret->buf->readcallback == xmlIOHTTPRead) &&
2137 (ret->buf->context != NULL)) {
2138 const char *encoding;
2139 const char *redir;
2140 const char *mime;
2141 int code;
2142
2143 code = xmlNanoHTTPReturnCode(ret->buf->context);
2144 if (code >= 400) {
2145 /* fatal error */
2146 if (ret->filename != NULL)
2147 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
2148 else
2149 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
2150 xmlFreeInputStream(ret);
2151 ret = NULL;
2152 } else {
2153
2154 mime = xmlNanoHTTPMimeType(ret->buf->context);
2155 if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
2156 (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
2157 encoding = xmlNanoHTTPEncoding(ret->buf->context);
2158 if (encoding != NULL)
2159 xmlSwitchEncodingName(ctxt, encoding);
2160 #if 0
2161 } else if (xmlStrstr(BAD_CAST mime, BAD_CAST "html")) {
2162 #endif
2163 }
2164 redir = xmlNanoHTTPRedir(ret->buf->context);
2165 if (redir != NULL) {
2166 if (ret->filename != NULL)
2167 xmlFree((xmlChar *) ret->filename);
2168 ret->filename =
2169 (char *) xmlStrdup((const xmlChar *) redir);
2170 }
2171 }
2172 }
2173 #endif
2174 return(ret);
2175 }
2176
2177 /**
2178 * xmlInputCreateUrl:
2179 * @filename: the filename to use as entity
2180 * @flags: XML_INPUT flags
2181 * @out: pointer to new parser input
2182 *
2183 * Create a new input stream based on a file or a URL.
2184 *
2185 * The flag XML_INPUT_UNZIP allows decompression.
2186 *
2187 * The flag XML_INPUT_NETWORK allows network access.
2188 *
2189 * Available since 2.14.0.
2190 *
2191 * Returns an xmlParserErrors code.
2192 */
2193 int
xmlInputCreateUrl(const char * filename,int flags,xmlParserInputPtr * out)2194 xmlInputCreateUrl(const char *filename, int flags, xmlParserInputPtr *out) {
2195 xmlParserInputBufferPtr buf;
2196 xmlParserInputPtr input;
2197 int code = XML_ERR_OK;
2198
2199 if (out == NULL)
2200 return(XML_ERR_ARGUMENT);
2201 *out = NULL;
2202 if (filename == NULL)
2203 return(XML_ERR_ARGUMENT);
2204
2205 if (xmlParserInputBufferCreateFilenameValue != NULL) {
2206 buf = xmlParserInputBufferCreateFilenameValue(filename,
2207 XML_CHAR_ENCODING_NONE);
2208 if (buf == NULL)
2209 code = XML_IO_ENOENT;
2210 } else {
2211 code = xmlParserInputBufferCreateUrl(filename, XML_CHAR_ENCODING_NONE,
2212 flags, &buf);
2213 }
2214 if (code != XML_ERR_OK)
2215 return(code);
2216
2217 input = xmlNewInputInternal(buf, filename);
2218 if (input == NULL)
2219 return(XML_ERR_NO_MEMORY);
2220
2221 /*input = xmlCheckHTTPInput(ctxt, input);*/
2222
2223 *out = input;
2224 return(XML_ERR_OK);
2225 }
2226
2227 /**
2228 * xmlNewInputFromFile:
2229 * @ctxt: an XML parser context
2230 * @filename: the filename to use as entity
2231 *
2232 * Create a new input stream based on a file or an URL.
2233 *
2234 * Returns the new input stream or NULL in case of error
2235 */
2236 xmlParserInputPtr
xmlNewInputFromFile(xmlParserCtxtPtr ctxt,const char * filename)2237 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2238 xmlParserInputPtr input;
2239 int flags = 0;
2240 int code;
2241
2242 if ((ctxt == NULL) || (filename == NULL))
2243 return(NULL);
2244
2245 if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2246 flags |= XML_INPUT_UNZIP;
2247 if ((ctxt->options & XML_PARSE_NONET) == 0)
2248 flags |= XML_INPUT_NETWORK;
2249
2250 code = xmlInputCreateUrl(filename, flags, &input);
2251 if (code != XML_ERR_OK) {
2252 xmlCtxtErrIO(ctxt, code, filename);
2253 return(NULL);
2254 }
2255
2256 input = xmlCheckHTTPInput(ctxt, input);
2257
2258 return(input);
2259 }
2260
2261 /**
2262 * xmlDefaultExternalEntityLoader:
2263 * @URL: the URL for the entity to load
2264 * @ID: the System ID for the entity to load
2265 * @ctxt: the context in which the entity is called or NULL
2266 *
2267 * By default we don't load external entities, yet.
2268 *
2269 * Returns a new allocated xmlParserInputPtr, or NULL.
2270 */
2271 static xmlParserInputPtr
xmlDefaultExternalEntityLoader(const char * url,const char * ID,xmlParserCtxtPtr ctxt)2272 xmlDefaultExternalEntityLoader(const char *url, const char *ID,
2273 xmlParserCtxtPtr ctxt)
2274 {
2275 xmlParserInputPtr input = NULL;
2276 char *resource = NULL;
2277
2278 (void) ID;
2279
2280 if (url == NULL)
2281 return(NULL);
2282
2283 #ifdef LIBXML_CATALOG_ENABLED
2284 resource = (char *) xmlResolveResourceFromCatalog(url, ID, ctxt);
2285 if (resource != NULL)
2286 url = resource;
2287 #endif
2288
2289 if ((ctxt != NULL) &&
2290 (ctxt->options & XML_PARSE_NONET) &&
2291 (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2292 xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2293 } else {
2294 input = xmlNewInputFromFile(ctxt, url);
2295 }
2296
2297 if (resource != NULL)
2298 xmlFree(resource);
2299 return(input);
2300 }
2301
2302 /**
2303 * xmlNoNetExternalEntityLoader:
2304 * @URL: the URL for the entity to load
2305 * @ID: the System ID for the entity to load
2306 * @ctxt: the context in which the entity is called or NULL
2307 *
2308 * DEPRECATED: Use XML_PARSE_NONET.
2309 *
2310 * A specific entity loader disabling network accesses, though still
2311 * allowing local catalog accesses for resolution.
2312 *
2313 * Returns a new allocated xmlParserInputPtr, or NULL.
2314 */
2315 xmlParserInputPtr
xmlNoNetExternalEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2316 xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
2317 xmlParserCtxtPtr ctxt) {
2318 int oldOptions = 0;
2319 xmlParserInputPtr input;
2320
2321 if (ctxt != NULL) {
2322 oldOptions = ctxt->options;
2323 ctxt->options |= XML_PARSE_NONET;
2324 }
2325
2326 input = xmlDefaultExternalEntityLoader(URL, ID, ctxt);
2327
2328 if (ctxt != NULL)
2329 ctxt->options = oldOptions;
2330
2331 return(input);
2332 }
2333
2334 /*
2335 * This global has to die eventually
2336 */
2337 static xmlExternalEntityLoader
2338 xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2339
2340 /**
2341 * xmlSetExternalEntityLoader:
2342 * @f: the new entity resolver function
2343 *
2344 * DEPRECATED: This is a global setting and not thread-safe. Use
2345 * xmlCtxtSetResourceLoader or similar functions.
2346 *
2347 * Changes the default external entity resolver function for the
2348 * application.
2349 */
2350 void
xmlSetExternalEntityLoader(xmlExternalEntityLoader f)2351 xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2352 xmlCurrentExternalEntityLoader = f;
2353 }
2354
2355 /**
2356 * xmlGetExternalEntityLoader:
2357 *
2358 * DEPRECATED: See xmlSetExternalEntityLoader.
2359 *
2360 * Get the default external entity resolver function for the application
2361 *
2362 * Returns the xmlExternalEntityLoader function pointer
2363 */
2364 xmlExternalEntityLoader
xmlGetExternalEntityLoader(void)2365 xmlGetExternalEntityLoader(void) {
2366 return(xmlCurrentExternalEntityLoader);
2367 }
2368
2369 /**
2370 * xmlCtxtSetResourceLoader:
2371 * @ctxt: parser context
2372 * @loader: callback
2373 * @vctxt: user data
2374 *
2375 * Installs a custom callback to load documents, DTDs or external
2376 * entities.
2377 *
2378 * Available since 2.14.0.
2379 */
2380 void
xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt,xmlResourceLoader loader,void * vctxt)2381 xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt, xmlResourceLoader loader,
2382 void *vctxt) {
2383 if (ctxt == NULL)
2384 return;
2385
2386 ctxt->resourceLoader = loader;
2387 ctxt->resourceCtxt = vctxt;
2388 }
2389
2390 /**
2391 * xmlLoadResource:
2392 * @ctxt: parser context
2393 * @url: the URL for the entity to load
2394 * @publicId: the Public ID for the entity to load
2395 * @type: resource type
2396 *
2397 * Returns the xmlParserInputPtr or NULL in case of error.
2398 */
2399 xmlParserInputPtr
xmlLoadResource(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,xmlResourceType type)2400 xmlLoadResource(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
2401 xmlResourceType type) {
2402 char *canonicFilename;
2403 xmlParserInputPtr ret;
2404
2405 if (url == NULL)
2406 return(NULL);
2407
2408 if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2409 int flags = 0;
2410 int code;
2411
2412 if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2413 flags |= XML_INPUT_UNZIP;
2414 if ((ctxt->options & XML_PARSE_NONET) == 0)
2415 flags |= XML_INPUT_NETWORK;
2416
2417 code = ctxt->resourceLoader(ctxt->resourceCtxt, url, publicId, flags,
2418 type, &ret);
2419 if (code != XML_ERR_OK) {
2420 xmlCtxtErrIO(ctxt, code, url);
2421 return(NULL);
2422 }
2423 return(ret);
2424 }
2425
2426 canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2427 if (canonicFilename == NULL) {
2428 xmlCtxtErrMemory(ctxt);
2429 return(NULL);
2430 }
2431
2432 ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2433 xmlFree(canonicFilename);
2434 return(ret);
2435 }
2436
2437 /**
2438 * xmlLoadExternalEntity:
2439 * @URL: the URL for the entity to load
2440 * @ID: the Public ID for the entity to load
2441 * @ctxt: the context in which the entity is called or NULL
2442 *
2443 * @URL is a filename or URL. If if contains the substring "://",
2444 * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2445 * treated as a filesystem path.
2446 *
2447 * @ID is an optional XML public ID, typically from a doctype
2448 * declaration. It is used for catalog lookups.
2449 *
2450 * The following resource loaders will be called if they were
2451 * registered (in order of precedence):
2452 *
2453 * - the resource loader set with xmlCtxtSetResourceLoader
2454 * - the global external entity loader set with
2455 * xmlSetExternalEntityLoader
2456 * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2457 * xmlParserInputBufferCreateFilenameDefault
2458 * - the default loader which will return
2459 * - the result from a matching global input callback set with
2460 * xmlRegisterInputCallbacks
2461 * - a HTTP resource if support is compiled in.
2462 * - a file opened from the filesystem, with automatic detection
2463 * of compressed files if support is compiled in.
2464 *
2465 * Returns the xmlParserInputPtr or NULL
2466 */
2467 xmlParserInputPtr
xmlLoadExternalEntity(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2468 xmlLoadExternalEntity(const char *URL, const char *ID,
2469 xmlParserCtxtPtr ctxt) {
2470 return(xmlLoadResource(ctxt, URL, ID, XML_RESOURCE_UNKNOWN));
2471 }
2472
2473 /************************************************************************
2474 * *
2475 * Commodity functions to handle parser contexts *
2476 * *
2477 ************************************************************************/
2478
2479 /**
2480 * xmlInitSAXParserCtxt:
2481 * @ctxt: XML parser context
2482 * @sax: SAX handlert
2483 * @userData: user data
2484 *
2485 * Initialize a SAX parser context
2486 *
2487 * Returns 0 in case of success and -1 in case of error
2488 */
2489
2490 static int
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt,const xmlSAXHandler * sax,void * userData)2491 xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2492 void *userData)
2493 {
2494 xmlParserInputPtr input;
2495
2496 if (ctxt == NULL)
2497 return(-1);
2498
2499 if (ctxt->dict == NULL)
2500 ctxt->dict = xmlDictCreate();
2501 if (ctxt->dict == NULL)
2502 return(-1);
2503 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
2504
2505 if (ctxt->sax == NULL)
2506 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2507 if (ctxt->sax == NULL)
2508 return(-1);
2509 if (sax == NULL) {
2510 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2511 xmlSAXVersion(ctxt->sax, 2);
2512 ctxt->userData = ctxt;
2513 } else {
2514 if (sax->initialized == XML_SAX2_MAGIC) {
2515 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2516 } else {
2517 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2518 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2519 }
2520 ctxt->userData = userData ? userData : ctxt;
2521 }
2522
2523 ctxt->maxatts = 0;
2524 ctxt->atts = NULL;
2525 /* Allocate the Input stack */
2526 if (ctxt->inputTab == NULL) {
2527 ctxt->inputTab = (xmlParserInputPtr *)
2528 xmlMalloc(5 * sizeof(xmlParserInputPtr));
2529 ctxt->inputMax = 5;
2530 }
2531 if (ctxt->inputTab == NULL)
2532 return(-1);
2533 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2534 xmlFreeInputStream(input);
2535 }
2536 ctxt->inputNr = 0;
2537 ctxt->input = NULL;
2538
2539 ctxt->version = NULL;
2540 ctxt->encoding = NULL;
2541 ctxt->standalone = -1;
2542 ctxt->hasExternalSubset = 0;
2543 ctxt->hasPErefs = 0;
2544 ctxt->html = 0;
2545 ctxt->instate = XML_PARSER_START;
2546
2547 /* Allocate the Node stack */
2548 if (ctxt->nodeTab == NULL) {
2549 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2550 ctxt->nodeMax = 10;
2551 }
2552 if (ctxt->nodeTab == NULL)
2553 return(-1);
2554 ctxt->nodeNr = 0;
2555 ctxt->node = NULL;
2556
2557 /* Allocate the Name stack */
2558 if (ctxt->nameTab == NULL) {
2559 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2560 ctxt->nameMax = 10;
2561 }
2562 if (ctxt->nameTab == NULL)
2563 return(-1);
2564 ctxt->nameNr = 0;
2565 ctxt->name = NULL;
2566
2567 /* Allocate the space stack */
2568 if (ctxt->spaceTab == NULL) {
2569 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2570 ctxt->spaceMax = 10;
2571 }
2572 if (ctxt->spaceTab == NULL)
2573 return(-1);
2574 ctxt->spaceNr = 1;
2575 ctxt->spaceMax = 10;
2576 ctxt->spaceTab[0] = -1;
2577 ctxt->space = &ctxt->spaceTab[0];
2578 ctxt->myDoc = NULL;
2579 ctxt->wellFormed = 1;
2580 ctxt->nsWellFormed = 1;
2581 ctxt->valid = 1;
2582
2583 ctxt->options = XML_PARSE_NODICT;
2584
2585 /*
2586 * Initialize some parser options from deprecated global variables.
2587 * Note that the "modern" API taking options arguments or
2588 * xmlCtxtSetOptions will ignore these defaults. They're only
2589 * relevant if old API functions like xmlParseFile are used.
2590 */
2591 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2592 if (ctxt->loadsubset) {
2593 ctxt->options |= XML_PARSE_DTDLOAD;
2594 }
2595 ctxt->validate = xmlDoValidityCheckingDefaultValue;
2596 if (ctxt->validate) {
2597 ctxt->options |= XML_PARSE_DTDVALID;
2598 }
2599 ctxt->pedantic = xmlPedanticParserDefaultValue;
2600 if (ctxt->pedantic) {
2601 ctxt->options |= XML_PARSE_PEDANTIC;
2602 }
2603 ctxt->linenumbers = xmlLineNumbersDefaultValue;
2604 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2605 if (ctxt->keepBlanks == 0) {
2606 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2607 ctxt->options |= XML_PARSE_NOBLANKS;
2608 }
2609 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2610 if (ctxt->replaceEntities) {
2611 ctxt->options |= XML_PARSE_NOENT;
2612 }
2613 if (xmlGetWarningsDefaultValue == 0)
2614 ctxt->options |= XML_PARSE_NOWARNING;
2615
2616 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2617 ctxt->vctxt.userData = ctxt;
2618 ctxt->vctxt.error = xmlParserValidityError;
2619 ctxt->vctxt.warning = xmlParserValidityWarning;
2620
2621 ctxt->record_info = 0;
2622 ctxt->checkIndex = 0;
2623 ctxt->inSubset = 0;
2624 ctxt->errNo = XML_ERR_OK;
2625 ctxt->depth = 0;
2626 ctxt->catalogs = NULL;
2627 ctxt->sizeentities = 0;
2628 ctxt->sizeentcopy = 0;
2629 ctxt->input_id = 1;
2630 ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2631 xmlInitNodeInfoSeq(&ctxt->node_seq);
2632
2633 if (ctxt->nsdb == NULL) {
2634 ctxt->nsdb = xmlParserNsCreate();
2635 if (ctxt->nsdb == NULL)
2636 return(-1);
2637 }
2638
2639 return(0);
2640 }
2641
2642 /**
2643 * xmlInitParserCtxt:
2644 * @ctxt: an XML parser context
2645 *
2646 * DEPRECATED: Internal function which will be made private in a future
2647 * version.
2648 *
2649 * Initialize a parser context
2650 *
2651 * Returns 0 in case of success and -1 in case of error
2652 */
2653
2654 int
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)2655 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2656 {
2657 return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2658 }
2659
2660 /**
2661 * xmlFreeParserCtxt:
2662 * @ctxt: an XML parser context
2663 *
2664 * Free all the memory used by a parser context. However the parsed
2665 * document in ctxt->myDoc is not freed.
2666 */
2667
2668 void
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)2669 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2670 {
2671 xmlParserInputPtr input;
2672
2673 if (ctxt == NULL) return;
2674
2675 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2676 xmlFreeInputStream(input);
2677 }
2678 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2679 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2680 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2681 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2682 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2683 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2684 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2685 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2686 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2687 #ifdef LIBXML_SAX1_ENABLED
2688 if ((ctxt->sax != NULL) &&
2689 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2690 #else
2691 if (ctxt->sax != NULL)
2692 #endif /* LIBXML_SAX1_ENABLED */
2693 xmlFree(ctxt->sax);
2694 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2695 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2696 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2697 if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2698 if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2699 if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2700 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2701 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2702 if (ctxt->attsDefault != NULL)
2703 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2704 if (ctxt->attsSpecial != NULL)
2705 xmlHashFree(ctxt->attsSpecial, NULL);
2706 if (ctxt->freeElems != NULL) {
2707 xmlNodePtr cur, next;
2708
2709 cur = ctxt->freeElems;
2710 while (cur != NULL) {
2711 next = cur->next;
2712 xmlFree(cur);
2713 cur = next;
2714 }
2715 }
2716 if (ctxt->freeAttrs != NULL) {
2717 xmlAttrPtr cur, next;
2718
2719 cur = ctxt->freeAttrs;
2720 while (cur != NULL) {
2721 next = cur->next;
2722 xmlFree(cur);
2723 cur = next;
2724 }
2725 }
2726 /*
2727 * cleanup the error strings
2728 */
2729 if (ctxt->lastError.message != NULL)
2730 xmlFree(ctxt->lastError.message);
2731 if (ctxt->lastError.file != NULL)
2732 xmlFree(ctxt->lastError.file);
2733 if (ctxt->lastError.str1 != NULL)
2734 xmlFree(ctxt->lastError.str1);
2735 if (ctxt->lastError.str2 != NULL)
2736 xmlFree(ctxt->lastError.str2);
2737 if (ctxt->lastError.str3 != NULL)
2738 xmlFree(ctxt->lastError.str3);
2739
2740 #ifdef LIBXML_CATALOG_ENABLED
2741 if (ctxt->catalogs != NULL)
2742 xmlCatalogFreeLocal(ctxt->catalogs);
2743 #endif
2744 xmlFree(ctxt);
2745 }
2746
2747 /**
2748 * xmlNewParserCtxt:
2749 *
2750 * Allocate and initialize a new parser context.
2751 *
2752 * Returns the xmlParserCtxtPtr or NULL
2753 */
2754
2755 xmlParserCtxtPtr
xmlNewParserCtxt(void)2756 xmlNewParserCtxt(void)
2757 {
2758 return(xmlNewSAXParserCtxt(NULL, NULL));
2759 }
2760
2761 /**
2762 * xmlNewSAXParserCtxt:
2763 * @sax: SAX handler
2764 * @userData: user data
2765 *
2766 * Allocate and initialize a new SAX parser context. If userData is NULL,
2767 * the parser context will be passed as user data.
2768 *
2769 * Available since 2.11.0. If you want support older versions,
2770 * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
2771 * struct assignment.
2772 *
2773 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2774 */
2775
2776 xmlParserCtxtPtr
xmlNewSAXParserCtxt(const xmlSAXHandler * sax,void * userData)2777 xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2778 {
2779 xmlParserCtxtPtr ctxt;
2780
2781 xmlInitParser();
2782
2783 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2784 if (ctxt == NULL)
2785 return(NULL);
2786 memset(ctxt, 0, sizeof(xmlParserCtxt));
2787 if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2788 xmlFreeParserCtxt(ctxt);
2789 return(NULL);
2790 }
2791 return(ctxt);
2792 }
2793
2794 /************************************************************************
2795 * *
2796 * Handling of node information *
2797 * *
2798 ************************************************************************/
2799
2800 /**
2801 * xmlClearParserCtxt:
2802 * @ctxt: an XML parser context
2803 *
2804 * Clear (release owned resources) and reinitialize a parser context
2805 */
2806
2807 void
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)2808 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2809 {
2810 if (ctxt==NULL)
2811 return;
2812 xmlClearNodeInfoSeq(&ctxt->node_seq);
2813 xmlCtxtReset(ctxt);
2814 }
2815
2816
2817 /**
2818 * xmlParserFindNodeInfo:
2819 * @ctx: an XML parser context
2820 * @node: an XML node within the tree
2821 *
2822 * DEPRECATED: Don't use.
2823 *
2824 * Find the parser node info struct for a given node
2825 *
2826 * Returns an xmlParserNodeInfo block pointer or NULL
2827 */
2828 const xmlParserNodeInfo *
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx,xmlNodePtr node)2829 xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
2830 {
2831 unsigned long pos;
2832
2833 if ((ctx == NULL) || (node == NULL))
2834 return (NULL);
2835 /* Find position where node should be at */
2836 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2837 if (pos < ctx->node_seq.length
2838 && ctx->node_seq.buffer[pos].node == node)
2839 return &ctx->node_seq.buffer[pos];
2840 else
2841 return NULL;
2842 }
2843
2844
2845 /**
2846 * xmlInitNodeInfoSeq:
2847 * @seq: a node info sequence pointer
2848 *
2849 * DEPRECATED: Don't use.
2850 *
2851 * -- Initialize (set to initial state) node info sequence
2852 */
2853 void
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)2854 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2855 {
2856 if (seq == NULL)
2857 return;
2858 seq->length = 0;
2859 seq->maximum = 0;
2860 seq->buffer = NULL;
2861 }
2862
2863 /**
2864 * xmlClearNodeInfoSeq:
2865 * @seq: a node info sequence pointer
2866 *
2867 * DEPRECATED: Don't use.
2868 *
2869 * -- Clear (release memory and reinitialize) node
2870 * info sequence
2871 */
2872 void
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)2873 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2874 {
2875 if (seq == NULL)
2876 return;
2877 if (seq->buffer != NULL)
2878 xmlFree(seq->buffer);
2879 xmlInitNodeInfoSeq(seq);
2880 }
2881
2882 /**
2883 * xmlParserFindNodeInfoIndex:
2884 * @seq: a node info sequence pointer
2885 * @node: an XML node pointer
2886 *
2887 * DEPRECATED: Don't use.
2888 *
2889 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2890 * the given node is or should be at in a sorted sequence
2891 *
2892 * Returns a long indicating the position of the record
2893 */
2894 unsigned long
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,xmlNodePtr node)2895 xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
2896 xmlNodePtr node)
2897 {
2898 unsigned long upper, lower, middle;
2899 int found = 0;
2900
2901 if ((seq == NULL) || (node == NULL))
2902 return ((unsigned long) -1);
2903
2904 /* Do a binary search for the key */
2905 lower = 1;
2906 upper = seq->length;
2907 middle = 0;
2908 while (lower <= upper && !found) {
2909 middle = lower + (upper - lower) / 2;
2910 if (node == seq->buffer[middle - 1].node)
2911 found = 1;
2912 else if (node < seq->buffer[middle - 1].node)
2913 upper = middle - 1;
2914 else
2915 lower = middle + 1;
2916 }
2917
2918 /* Return position */
2919 if (middle == 0 || seq->buffer[middle - 1].node < node)
2920 return middle;
2921 else
2922 return middle - 1;
2923 }
2924
2925
2926 /**
2927 * xmlParserAddNodeInfo:
2928 * @ctxt: an XML parser context
2929 * @info: a node info sequence pointer
2930 *
2931 * DEPRECATED: Don't use.
2932 *
2933 * Insert node info record into the sorted sequence
2934 */
2935 void
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,xmlParserNodeInfoPtr info)2936 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2937 xmlParserNodeInfoPtr info)
2938 {
2939 unsigned long pos;
2940
2941 if ((ctxt == NULL) || (info == NULL)) return;
2942
2943 /* Find pos and check to see if node is already in the sequence */
2944 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2945 info->node);
2946
2947 if ((pos < ctxt->node_seq.length) &&
2948 (ctxt->node_seq.buffer != NULL) &&
2949 (ctxt->node_seq.buffer[pos].node == info->node)) {
2950 ctxt->node_seq.buffer[pos] = *info;
2951 }
2952
2953 /* Otherwise, we need to add new node to buffer */
2954 else {
2955 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2956 (ctxt->node_seq.buffer == NULL)) {
2957 xmlParserNodeInfo *tmp_buffer;
2958 unsigned int byte_size;
2959
2960 if (ctxt->node_seq.maximum == 0)
2961 ctxt->node_seq.maximum = 2;
2962 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2963 (2 * ctxt->node_seq.maximum));
2964
2965 if (ctxt->node_seq.buffer == NULL)
2966 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2967 else
2968 tmp_buffer =
2969 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2970 byte_size);
2971
2972 if (tmp_buffer == NULL) {
2973 xmlCtxtErrMemory(ctxt);
2974 return;
2975 }
2976 ctxt->node_seq.buffer = tmp_buffer;
2977 ctxt->node_seq.maximum *= 2;
2978 }
2979
2980 /* If position is not at end, move elements out of the way */
2981 if (pos != ctxt->node_seq.length) {
2982 unsigned long i;
2983
2984 for (i = ctxt->node_seq.length; i > pos; i--)
2985 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2986 }
2987
2988 /* Copy element and increase length */
2989 ctxt->node_seq.buffer[pos] = *info;
2990 ctxt->node_seq.length++;
2991 }
2992 }
2993
2994 /************************************************************************
2995 * *
2996 * Defaults settings *
2997 * *
2998 ************************************************************************/
2999 /**
3000 * xmlPedanticParserDefault:
3001 * @val: int 0 or 1
3002 *
3003 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
3004 *
3005 * Set and return the previous value for enabling pedantic warnings.
3006 *
3007 * Returns the last value for 0 for no substitution, 1 for substitution.
3008 */
3009
3010 int
xmlPedanticParserDefault(int val)3011 xmlPedanticParserDefault(int val) {
3012 int old = xmlPedanticParserDefaultValue;
3013
3014 xmlPedanticParserDefaultValue = val;
3015 return(old);
3016 }
3017
3018 /**
3019 * xmlLineNumbersDefault:
3020 * @val: int 0 or 1
3021 *
3022 * DEPRECATED: The modern options API always enables line numbers.
3023 *
3024 * Set and return the previous value for enabling line numbers in elements
3025 * contents. This may break on old application and is turned off by default.
3026 *
3027 * Returns the last value for 0 for no substitution, 1 for substitution.
3028 */
3029
3030 int
xmlLineNumbersDefault(int val)3031 xmlLineNumbersDefault(int val) {
3032 int old = xmlLineNumbersDefaultValue;
3033
3034 xmlLineNumbersDefaultValue = val;
3035 return(old);
3036 }
3037
3038 /**
3039 * xmlSubstituteEntitiesDefault:
3040 * @val: int 0 or 1
3041 *
3042 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
3043 *
3044 * Set and return the previous value for default entity support.
3045 * Initially the parser always keep entity references instead of substituting
3046 * entity values in the output. This function has to be used to change the
3047 * default parser behavior
3048 * SAX::substituteEntities() has to be used for changing that on a file by
3049 * file basis.
3050 *
3051 * Returns the last value for 0 for no substitution, 1 for substitution.
3052 */
3053
3054 int
xmlSubstituteEntitiesDefault(int val)3055 xmlSubstituteEntitiesDefault(int val) {
3056 int old = xmlSubstituteEntitiesDefaultValue;
3057
3058 xmlSubstituteEntitiesDefaultValue = val;
3059 return(old);
3060 }
3061
3062 /**
3063 * xmlKeepBlanksDefault:
3064 * @val: int 0 or 1
3065 *
3066 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
3067 *
3068 * Set and return the previous value for default blanks text nodes support.
3069 * The 1.x version of the parser used an heuristic to try to detect
3070 * ignorable white spaces. As a result the SAX callback was generating
3071 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
3072 * using the DOM output text nodes containing those blanks were not generated.
3073 * The 2.x and later version will switch to the XML standard way and
3074 * ignorableWhitespace() are only generated when running the parser in
3075 * validating mode and when the current element doesn't allow CDATA or
3076 * mixed content.
3077 * This function is provided as a way to force the standard behavior
3078 * on 1.X libs and to switch back to the old mode for compatibility when
3079 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
3080 * by using xmlIsBlankNode() commodity function to detect the "empty"
3081 * nodes generated.
3082 * This value also affect autogeneration of indentation when saving code
3083 * if blanks sections are kept, indentation is not generated.
3084 *
3085 * Returns the last value for 0 for no substitution, 1 for substitution.
3086 */
3087
3088 int
xmlKeepBlanksDefault(int val)3089 xmlKeepBlanksDefault(int val) {
3090 int old = xmlKeepBlanksDefaultValue;
3091
3092 xmlKeepBlanksDefaultValue = val;
3093 #ifdef LIBXML_OUTPUT_ENABLED
3094 if (!val)
3095 xmlIndentTreeOutput = 1;
3096 #endif
3097 return(old);
3098 }
3099
3100