• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parserInternals.c : Internal routines (and obsolete ones) needed for the
3  *                     XML and HTML parsers.
4  *
5  * See Copyright for the status of this software.
6  *
7  * daniel@veillard.com
8  */
9 
10 #define IN_LIBXML
11 #include "libxml.h"
12 
13 #if defined(_WIN32)
14 #define XML_DIR_SEP '\\'
15 #else
16 #define XML_DIR_SEP '/'
17 #endif
18 
19 #include <string.h>
20 #include <ctype.h>
21 #include <stdlib.h>
22 
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/entities.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/encoding.h>
30 #include <libxml/xmlIO.h>
31 #include <libxml/uri.h>
32 #include <libxml/dict.h>
33 #include <libxml/xmlsave.h>
34 #ifdef LIBXML_CATALOG_ENABLED
35 #include <libxml/catalog.h>
36 #endif
37 #include <libxml/chvalid.h>
38 #include <libxml/nanohttp.h>
39 
40 #define CUR(ctxt) ctxt->input->cur
41 #define END(ctxt) ctxt->input->end
42 
43 #include "private/buf.h"
44 #include "private/enc.h"
45 #include "private/error.h"
46 #include "private/io.h"
47 #include "private/parser.h"
48 
49 #define XML_MAX_ERRORS 100
50 
51 /*
52  * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
53  * factor of serialized output after entity expansion.
54  */
55 #define XML_MAX_AMPLIFICATION_DEFAULT 5
56 
57 /*
58  * Various global defaults for parsing
59  */
60 
61 /**
62  * xmlCheckVersion:
63  * @version: the include version number
64  *
65  * check the compiled lib version against the include one.
66  */
67 void
xmlCheckVersion(int version)68 xmlCheckVersion(int version) {
69     int myversion = LIBXML_VERSION;
70 
71     xmlInitParser();
72 
73     if ((myversion / 10000) != (version / 10000)) {
74 	fprintf(stderr,
75 		"Fatal: program compiled against libxml %d using libxml %d\n",
76 		(version / 10000), (myversion / 10000));
77     } else if ((myversion / 100) < (version / 100)) {
78 	fprintf(stderr,
79 		"Warning: program compiled against libxml %d using older %d\n",
80 		(version / 100), (myversion / 100));
81     }
82 }
83 
84 
85 /************************************************************************
86  *									*
87  *		Some factorized error routines				*
88  *									*
89  ************************************************************************/
90 
91 
92 /**
93  * xmlCtxtSetErrorHandler:
94  * @ctxt:  an XML parser context
95  * @handler:  error handler
96  * @data:  data for error handler
97  *
98  * Register a callback function that will be called on errors and
99  * warnings. If handler is NULL, the error handler will be deactivated.
100  *
101  * This is the recommended way to collect errors from the parser and
102  * takes precedence over all other error reporting mechanisms.
103  * These are (in order of precedence):
104  *
105  * - per-context structured handler (xmlCtxtSetErrorHandler)
106  * - per-context structured "serror" SAX handler
107  * - global structured handler (xmlSetStructuredErrorFunc)
108  * - per-context generic "error" and "warning" SAX handlers
109  * - global generic handler (xmlSetGenericErrorFunc)
110  * - print to stderr
111  *
112  * Available since 2.13.0.
113  */
114 void
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt,xmlStructuredErrorFunc handler,void * data)115 xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
116                        void *data)
117 {
118     if (ctxt == NULL)
119         return;
120     ctxt->errorHandler = handler;
121     ctxt->errorCtxt = data;
122 }
123 
124 /**
125  * xmlCtxtErrMemory:
126  * @ctxt:  an XML parser context
127  *
128  * Handle an out-of-memory error.
129  *
130  * Available since 2.13.0.
131  */
132 void
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)133 xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
134 {
135     xmlStructuredErrorFunc schannel = NULL;
136     xmlGenericErrorFunc channel = NULL;
137     void *data;
138 
139     if (ctxt == NULL)
140         return;
141 
142     ctxt->errNo = XML_ERR_NO_MEMORY;
143     ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
144     ctxt->wellFormed = 0;
145     ctxt->disableSAX = 2;
146 
147     if (ctxt->errorHandler) {
148         schannel = ctxt->errorHandler;
149         data = ctxt->errorCtxt;
150     } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
151         (ctxt->sax->serror != NULL)) {
152         schannel = ctxt->sax->serror;
153         data = ctxt->userData;
154     } else {
155         channel = ctxt->sax->error;
156         data = ctxt->userData;
157     }
158 
159     xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
160                         &ctxt->lastError);
161 }
162 
163 /**
164  * xmlCtxtErrIO:
165  * @ctxt:  parser context
166  * @code:  xmlParserErrors code
167  * @uri:  filename or URI (optional)
168  *
169  * If filename is empty, use the one from context input if available.
170  *
171  * Report an IO error to the parser context.
172  */
173 void
xmlCtxtErrIO(xmlParserCtxtPtr ctxt,int code,const char * uri)174 xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
175 {
176     const char *errstr, *msg, *str1, *str2;
177     xmlErrorLevel level;
178 
179     if (ctxt == NULL)
180         return;
181 
182     /*
183      * Don't report a well-formedness error if an external entity could
184      * not be found. We assume that inputNr is zero for the document
185      * entity which is somewhat fragile.
186      */
187     if ((ctxt->inputNr > 0) &&
188         ((code == XML_IO_ENOENT) ||
189          (code == XML_IO_NETWORK_ATTEMPT) ||
190          (code == XML_IO_UNKNOWN))) {
191         if (ctxt->validate == 0)
192             level = XML_ERR_WARNING;
193         else
194             level = XML_ERR_ERROR;
195     } else {
196         level = XML_ERR_FATAL;
197     }
198 
199     errstr = xmlErrString(code);
200 
201     if (uri == NULL) {
202         msg = "%s\n";
203         str1 = errstr;
204         str2 = NULL;
205     } else {
206         msg = "failed to load \"%s\": %s\n";
207         str1 = uri;
208         str2 = errstr;
209     }
210 
211     xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
212                (const xmlChar *) uri, NULL, NULL, 0,
213                msg, str1, str2);
214 }
215 
216 /**
217  * xmlCtxtVErr:
218  * @ctxt:  a parser context
219  * @node: the current node or NULL
220  * @domain: the domain for the error
221  * @code: the code for the error
222  * @level: the xmlErrorLevel for the error
223  * @str1: extra string info
224  * @str2: extra string info
225  * @str3: extra string info
226  * @int1: extra int info
227  * @msg:  the message to display/transmit
228  * @ap:  extra parameters for the message display
229  *
230  * Raise a parser error.
231  */
232 void
xmlCtxtVErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,va_list ap)233 xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
234             xmlParserErrors code, xmlErrorLevel level,
235             const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
236             int int1, const char *msg, va_list ap)
237 {
238     xmlStructuredErrorFunc schannel = NULL;
239     xmlGenericErrorFunc channel = NULL;
240     void *data = NULL;
241     const char *file = NULL;
242     int line = 0;
243     int col = 0;
244     int res;
245 
246     if (code == XML_ERR_NO_MEMORY) {
247         xmlCtxtErrMemory(ctxt);
248         return;
249     }
250 
251     if (PARSER_STOPPED(ctxt))
252 	return;
253 
254     if (level == XML_ERR_WARNING) {
255         if (ctxt->nbWarnings >= XML_MAX_ERRORS)
256             return;
257         ctxt->nbWarnings += 1;
258     } else {
259         if (ctxt->nbErrors >= XML_MAX_ERRORS)
260             return;
261         ctxt->nbErrors += 1;
262     }
263 
264     if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
265         ((level != XML_ERR_WARNING) ||
266          ((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
267         if (ctxt->errorHandler) {
268             schannel = ctxt->errorHandler;
269             data = ctxt->errorCtxt;
270         } else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
271             (ctxt->sax->serror != NULL)) {
272             schannel = ctxt->sax->serror;
273             data = ctxt->userData;
274         } else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
275             if (level == XML_ERR_WARNING)
276                 channel = ctxt->vctxt.warning;
277             else
278                 channel = ctxt->vctxt.error;
279             data = ctxt->vctxt.userData;
280         } else {
281             if (level == XML_ERR_WARNING)
282                 channel = ctxt->sax->warning;
283             else
284                 channel = ctxt->sax->error;
285             data = ctxt->userData;
286         }
287     }
288 
289     if (ctxt->input != NULL) {
290         xmlParserInputPtr input = ctxt->input;
291 
292         if ((input->filename == NULL) &&
293             (ctxt->inputNr > 1)) {
294             input = ctxt->inputTab[ctxt->inputNr - 2];
295         }
296         file = input->filename;
297         line = input->line;
298         col = input->col;
299     }
300 
301     res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
302                          level, file, line, (const char *) str1,
303                          (const char *) str2, (const char *) str3, int1, col,
304                          msg, ap);
305 
306     if (res < 0) {
307         xmlCtxtErrMemory(ctxt);
308         return;
309     }
310 
311     if (level >= XML_ERR_ERROR)
312         ctxt->errNo = code;
313     if (level == XML_ERR_FATAL) {
314         ctxt->wellFormed = 0;
315         if (ctxt->recovery == 0)
316             ctxt->disableSAX = 1;
317     }
318 
319     return;
320 }
321 
322 /**
323  * xmlCtxtErr:
324  * @ctxt:  a parser context
325  * @node: the current node or NULL
326  * @domain: the domain for the error
327  * @code: the code for the error
328  * @level: the xmlErrorLevel for the error
329  * @str1: extra string info
330  * @str2: extra string info
331  * @str3: extra string info
332  * @int1: extra int info
333  * @msg:  the message to display/transmit
334  * @...:  extra parameters for the message display
335  *
336  * Raise a parser error.
337  */
338 void
xmlCtxtErr(xmlParserCtxtPtr ctxt,xmlNodePtr node,xmlErrorDomain domain,xmlParserErrors code,xmlErrorLevel level,const xmlChar * str1,const xmlChar * str2,const xmlChar * str3,int int1,const char * msg,...)339 xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
340            xmlParserErrors code, xmlErrorLevel level,
341            const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
342            int int1, const char *msg, ...)
343 {
344     va_list ap;
345 
346     va_start(ap, msg);
347     xmlCtxtVErr(ctxt, node, domain, code, level,
348                 str1, str2, str3, int1, msg, ap);
349     va_end(ap);
350 }
351 
352 /**
353  * xmlFatalErr:
354  * @ctxt:  an XML parser context
355  * @code:  the error number
356  * @info:  extra information string
357  *
358  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
359  */
360 void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors code,const char * info)361 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
362 {
363     const char *errmsg;
364     xmlErrorLevel level;
365 
366     if (code == XML_ERR_UNSUPPORTED_ENCODING)
367         level = XML_ERR_WARNING;
368     else
369         level = XML_ERR_FATAL;
370 
371     errmsg = xmlErrString(code);
372 
373     if (info == NULL) {
374         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
375                    NULL, NULL, NULL, 0, "%s\n", errmsg);
376     } else {
377         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, level,
378                    (const xmlChar *) info, NULL, NULL, 0,
379                    "%s: %s\n", errmsg, info);
380     }
381 }
382 
383 /**
384  * xmlIsLetter:
385  * @c:  an unicode character (int)
386  *
387  * Check whether the character is allowed by the production
388  * [84] Letter ::= BaseChar | Ideographic
389  *
390  * Returns 0 if not, non-zero otherwise
391  */
392 int
xmlIsLetter(int c)393 xmlIsLetter(int c) {
394     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
395 }
396 
397 /************************************************************************
398  *									*
399  *		Input handling functions for progressive parsing	*
400  *									*
401  ************************************************************************/
402 
403 /* we need to keep enough input to show errors in context */
404 #define LINE_LEN        80
405 
406 /**
407  * xmlHaltParser:
408  * @ctxt:  an XML parser context
409  *
410  * Blocks further parser processing don't override error
411  * for internal use
412  */
413 void
xmlHaltParser(xmlParserCtxtPtr ctxt)414 xmlHaltParser(xmlParserCtxtPtr ctxt) {
415     if (ctxt == NULL)
416         return;
417     ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
418     ctxt->disableSAX = 2;
419 }
420 
421 /**
422  * xmlParserInputRead:
423  * @in:  an XML parser input
424  * @len:  an indicative size for the lookahead
425  *
426  * DEPRECATED: This function was internal and is deprecated.
427  *
428  * Returns -1 as this is an error to use it.
429  */
430 int
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED,int len ATTRIBUTE_UNUSED)431 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
432     return(-1);
433 }
434 
435 /**
436  * xmlParserGrow:
437  * @ctxt:  an XML parser context
438  *
439  * Grow the input buffer.
440  *
441  * Returns the number of bytes read or -1 in case of error.
442  */
443 int
xmlParserGrow(xmlParserCtxtPtr ctxt)444 xmlParserGrow(xmlParserCtxtPtr ctxt) {
445     xmlParserInputPtr in = ctxt->input;
446     xmlParserInputBufferPtr buf = in->buf;
447     size_t curEnd = in->end - in->cur;
448     size_t curBase = in->cur - in->base;
449     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
450                        XML_MAX_HUGE_LENGTH :
451                        XML_MAX_LOOKUP_LIMIT;
452     int ret;
453 
454     if (buf == NULL)
455         return(0);
456     /* Don't grow push parser buffer. */
457     if (PARSER_PROGRESSIVE(ctxt))
458         return(0);
459     /* Don't grow memory buffers. */
460     if ((buf->encoder == NULL) && (buf->readcallback == NULL))
461         return(0);
462     if (buf->error != 0)
463         return(-1);
464 
465     if (curBase > maxLength) {
466         xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
467                     "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
468         xmlHaltParser(ctxt);
469 	return(-1);
470     }
471 
472     if (curEnd >= INPUT_CHUNK)
473         return(0);
474 
475     ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
476     xmlBufUpdateInput(buf->buffer, in, curBase);
477 
478     if (ret < 0) {
479         xmlCtxtErrIO(ctxt, buf->error, NULL);
480     }
481 
482     return(ret);
483 }
484 
485 /**
486  * xmlParserInputGrow:
487  * @in:  an XML parser input
488  * @len:  an indicative size for the lookahead
489  *
490  * DEPRECATED: Don't use.
491  *
492  * This function increase the input for the parser. It tries to
493  * preserve pointers to the input buffer, and keep already read data
494  *
495  * Returns the amount of char read, or -1 in case of error, 0 indicate the
496  * end of this entity
497  */
498 int
xmlParserInputGrow(xmlParserInputPtr in,int len)499 xmlParserInputGrow(xmlParserInputPtr in, int len) {
500     int ret;
501     size_t indx;
502 
503     if ((in == NULL) || (len < 0)) return(-1);
504     if (in->buf == NULL) return(-1);
505     if (in->base == NULL) return(-1);
506     if (in->cur == NULL) return(-1);
507     if (in->buf->buffer == NULL) return(-1);
508 
509     /* Don't grow memory buffers. */
510     if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
511         return(0);
512 
513     indx = in->cur - in->base;
514     if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
515         return(0);
516     }
517     ret = xmlParserInputBufferGrow(in->buf, len);
518 
519     in->base = xmlBufContent(in->buf->buffer);
520     if (in->base == NULL) {
521         in->base = BAD_CAST "";
522         in->cur = in->base;
523         in->end = in->base;
524         return(-1);
525     }
526     in->cur = in->base + indx;
527     in->end = xmlBufEnd(in->buf->buffer);
528 
529     return(ret);
530 }
531 
532 /**
533  * xmlParserShrink:
534  * @ctxt:  an XML parser context
535  *
536  * Shrink the input buffer.
537  */
538 void
xmlParserShrink(xmlParserCtxtPtr ctxt)539 xmlParserShrink(xmlParserCtxtPtr ctxt) {
540     xmlParserInputPtr in = ctxt->input;
541     xmlParserInputBufferPtr buf = in->buf;
542     size_t used;
543 
544     if (buf == NULL)
545         return;
546     /* Don't shrink pull parser memory buffers. */
547     if ((!PARSER_PROGRESSIVE(ctxt)) &&
548         (buf->encoder == NULL) &&
549         (buf->readcallback == NULL))
550         return;
551 
552     used = in->cur - in->base;
553     /*
554      * Do not shrink on large buffers whose only a tiny fraction
555      * was consumed
556      */
557     if (used > INPUT_CHUNK) {
558 	size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
559 
560 	if (res > 0) {
561             used -= res;
562             if ((res > ULONG_MAX) ||
563                 (in->consumed > ULONG_MAX - (unsigned long)res))
564                 in->consumed = ULONG_MAX;
565             else
566                 in->consumed += res;
567 	}
568     }
569 
570     xmlBufUpdateInput(buf->buffer, in, used);
571 }
572 
573 /**
574  * xmlParserInputShrink:
575  * @in:  an XML parser input
576  *
577  * DEPRECATED: Don't use.
578  *
579  * This function removes used input for the parser.
580  */
581 void
xmlParserInputShrink(xmlParserInputPtr in)582 xmlParserInputShrink(xmlParserInputPtr in) {
583     size_t used;
584     size_t ret;
585 
586     if (in == NULL) return;
587     if (in->buf == NULL) return;
588     if (in->base == NULL) return;
589     if (in->cur == NULL) return;
590     if (in->buf->buffer == NULL) return;
591 
592     used = in->cur - in->base;
593     /*
594      * Do not shrink on large buffers whose only a tiny fraction
595      * was consumed
596      */
597     if (used > INPUT_CHUNK) {
598 	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
599 	if (ret > 0) {
600             used -= ret;
601             if ((ret > ULONG_MAX) ||
602                 (in->consumed > ULONG_MAX - (unsigned long)ret))
603                 in->consumed = ULONG_MAX;
604             else
605                 in->consumed += ret;
606 	}
607     }
608 
609     if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
610         xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
611     }
612 
613     in->base = xmlBufContent(in->buf->buffer);
614     if (in->base == NULL) {
615         /* TODO: raise error */
616         in->base = BAD_CAST "";
617         in->cur = in->base;
618         in->end = in->base;
619         return;
620     }
621     in->cur = in->base + used;
622     in->end = xmlBufEnd(in->buf->buffer);
623 }
624 
625 /************************************************************************
626  *									*
627  *		UTF8 character input and related functions		*
628  *									*
629  ************************************************************************/
630 
631 /**
632  * xmlNextChar:
633  * @ctxt:  the XML parser context
634  *
635  * DEPRECATED: Internal function, do not use.
636  *
637  * Skip to the next char input char.
638  */
639 
640 void
xmlNextChar(xmlParserCtxtPtr ctxt)641 xmlNextChar(xmlParserCtxtPtr ctxt)
642 {
643     const unsigned char *cur;
644     size_t avail;
645     int c;
646 
647     if ((ctxt == NULL) || (ctxt->input == NULL))
648         return;
649 
650     avail = ctxt->input->end - ctxt->input->cur;
651 
652     if (avail < INPUT_CHUNK) {
653         xmlParserGrow(ctxt);
654         if (ctxt->input->cur >= ctxt->input->end)
655             return;
656         avail = ctxt->input->end - ctxt->input->cur;
657     }
658 
659     cur = ctxt->input->cur;
660     c = *cur;
661 
662     if (c < 0x80) {
663         if (c == '\n') {
664             ctxt->input->cur++;
665             ctxt->input->line++;
666             ctxt->input->col = 1;
667         } else if (c == '\r') {
668             /*
669              *   2.11 End-of-Line Handling
670              *   the literal two-character sequence "#xD#xA" or a standalone
671              *   literal #xD, an XML processor must pass to the application
672              *   the single character #xA.
673              */
674             ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
675             ctxt->input->line++;
676             ctxt->input->col = 1;
677             return;
678         } else {
679             ctxt->input->cur++;
680             ctxt->input->col++;
681         }
682     } else {
683         ctxt->input->col++;
684 
685         if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
686             goto encoding_error;
687 
688         if (c < 0xe0) {
689             /* 2-byte code */
690             if (c < 0xc2)
691                 goto encoding_error;
692             ctxt->input->cur += 2;
693         } else {
694             unsigned int val = (c << 8) | cur[1];
695 
696             if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
697                 goto encoding_error;
698 
699             if (c < 0xf0) {
700                 /* 3-byte code */
701                 if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
702                     goto encoding_error;
703                 ctxt->input->cur += 3;
704             } else {
705                 if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
706                     goto encoding_error;
707 
708                 /* 4-byte code */
709                 if ((val < 0xf090) || (val >= 0xf490))
710                     goto encoding_error;
711                 ctxt->input->cur += 4;
712             }
713         }
714     }
715 
716     return;
717 
718 encoding_error:
719     /* Only report the first error */
720     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
721         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
722         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
723     }
724     ctxt->input->cur++;
725     return;
726 }
727 
728 /**
729  * xmlCurrentChar:
730  * @ctxt:  the XML parser context
731  * @len:  pointer to the length of the char read
732  *
733  * DEPRECATED: Internal function, do not use.
734  *
735  * The current char value, if using UTF-8 this may actually span multiple
736  * bytes in the input buffer. Implement the end of line normalization:
737  * 2.11 End-of-Line Handling
738  * Wherever an external parsed entity or the literal entity value
739  * of an internal parsed entity contains either the literal two-character
740  * sequence "#xD#xA" or a standalone literal #xD, an XML processor
741  * must pass to the application the single character #xA.
742  * This behavior can conveniently be produced by normalizing all
743  * line breaks to #xA on input, before parsing.)
744  *
745  * Returns the current char value and its length
746  */
747 
748 int
xmlCurrentChar(xmlParserCtxtPtr ctxt,int * len)749 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
750     const unsigned char *cur;
751     size_t avail;
752     int c;
753 
754     if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
755 
756     avail = ctxt->input->end - ctxt->input->cur;
757 
758     if (avail < INPUT_CHUNK) {
759         xmlParserGrow(ctxt);
760         avail = ctxt->input->end - ctxt->input->cur;
761     }
762 
763     cur = ctxt->input->cur;
764     c = *cur;
765 
766     if (c < 0x80) {
767 	/* 1-byte code */
768         if (c < 0x20) {
769             /*
770              *   2.11 End-of-Line Handling
771              *   the literal two-character sequence "#xD#xA" or a standalone
772              *   literal #xD, an XML processor must pass to the application
773              *   the single character #xA.
774              */
775             if (c == '\r') {
776                 /*
777                  * TODO: This function shouldn't change the 'cur' pointer
778                  * as side effect, but the NEXTL macro in parser.c relies
779                  * on this behavior when incrementing line numbers.
780                  */
781                 if (cur[1] == '\n')
782                     ctxt->input->cur++;
783                 *len = 1;
784                 c = '\n';
785             } else if (c == 0) {
786                 if (ctxt->input->cur >= ctxt->input->end) {
787                     *len = 0;
788                 } else {
789                     *len = 1;
790                     /*
791                      * TODO: Null bytes should be handled by callers,
792                      * but this can be tricky.
793                      */
794                     xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
795                             "Char 0x0 out of allowed range\n");
796                 }
797             } else {
798                 *len = 1;
799             }
800         } else {
801             *len = 1;
802         }
803 
804         return(c);
805     } else {
806         int val;
807 
808         if (avail < 2)
809             goto incomplete_sequence;
810         if ((cur[1] & 0xc0) != 0x80)
811             goto encoding_error;
812 
813         if (c < 0xe0) {
814             /* 2-byte code */
815             if (c < 0xc2)
816                 goto encoding_error;
817             val = (c & 0x1f) << 6;
818             val |= cur[1] & 0x3f;
819             *len = 2;
820         } else {
821             if (avail < 3)
822                 goto incomplete_sequence;
823             if ((cur[2] & 0xc0) != 0x80)
824                 goto encoding_error;
825 
826             if (c < 0xf0) {
827                 /* 3-byte code */
828                 val = (c & 0xf) << 12;
829                 val |= (cur[1] & 0x3f) << 6;
830                 val |= cur[2] & 0x3f;
831                 if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
832                     goto encoding_error;
833                 *len = 3;
834             } else {
835                 if (avail < 4)
836                     goto incomplete_sequence;
837                 if ((cur[3] & 0xc0) != 0x80)
838                     goto encoding_error;
839 
840                 /* 4-byte code */
841                 val = (c & 0x0f) << 18;
842                 val |= (cur[1] & 0x3f) << 12;
843                 val |= (cur[2] & 0x3f) << 6;
844                 val |= cur[3] & 0x3f;
845                 if ((val < 0x10000) || (val >= 0x110000))
846                     goto encoding_error;
847                 *len = 4;
848             }
849         }
850 
851         return(val);
852     }
853 
854 encoding_error:
855     /* Only report the first error */
856     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
857         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
858         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
859     }
860     *len = 1;
861     return(0xFFFD); /* U+FFFD Replacement Character */
862 
863 incomplete_sequence:
864     /*
865      * An encoding problem may arise from a truncated input buffer
866      * splitting a character in the middle. In that case do not raise
867      * an error but return 0. This should only happen when push parsing
868      * char data.
869      */
870     *len = 0;
871     return(0);
872 }
873 
874 /**
875  * xmlStringCurrentChar:
876  * @ctxt:  the XML parser context
877  * @cur:  pointer to the beginning of the char
878  * @len:  pointer to the length of the char read
879  *
880  * DEPRECATED: Internal function, do not use.
881  *
882  * The current char value, if using UTF-8 this may actually span multiple
883  * bytes in the input buffer.
884  *
885  * Returns the current char value and its length
886  */
887 
888 int
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,const xmlChar * cur,int * len)889 xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
890                      const xmlChar *cur, int *len) {
891     int c;
892 
893     if ((cur == NULL) || (len == NULL))
894         return(0);
895 
896     /* cur is zero-terminated, so we can lie about its length. */
897     *len = 4;
898     c = xmlGetUTF8Char(cur, len);
899 
900     return((c < 0) ? 0 : c);
901 }
902 
903 /**
904  * xmlCopyCharMultiByte:
905  * @out:  pointer to an array of xmlChar
906  * @val:  the char value
907  *
908  * append the char value in the array
909  *
910  * Returns the number of xmlChar written
911  */
912 int
xmlCopyCharMultiByte(xmlChar * out,int val)913 xmlCopyCharMultiByte(xmlChar *out, int val) {
914     if ((out == NULL) || (val < 0)) return(0);
915     /*
916      * We are supposed to handle UTF8, check it's valid
917      * From rfc2044: encoding of the Unicode values on UTF-8:
918      *
919      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
920      * 0000 0000-0000 007F   0xxxxxxx
921      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
922      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
923      */
924     if  (val >= 0x80) {
925 	xmlChar *savedout = out;
926 	int bits;
927 	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
928 	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
929 	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
930 	else {
931 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
932             fprintf(stderr, "xmlCopyCharMultiByte: codepoint out of range\n");
933             abort();
934 #endif
935 	    return(0);
936 	}
937 	for ( ; bits >= 0; bits-= 6)
938 	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
939 	return (out - savedout);
940     }
941     *out = val;
942     return 1;
943 }
944 
945 /**
946  * xmlCopyChar:
947  * @len:  Ignored, compatibility
948  * @out:  pointer to an array of xmlChar
949  * @val:  the char value
950  *
951  * append the char value in the array
952  *
953  * Returns the number of xmlChar written
954  */
955 
956 int
xmlCopyChar(int len ATTRIBUTE_UNUSED,xmlChar * out,int val)957 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
958     if ((out == NULL) || (val < 0)) return(0);
959     /* the len parameter is ignored */
960     if  (val >= 0x80) {
961 	return(xmlCopyCharMultiByte (out, val));
962     }
963     *out = val;
964     return 1;
965 }
966 
967 /************************************************************************
968  *									*
969  *		Commodity functions to switch encodings			*
970  *									*
971  ************************************************************************/
972 
973 static int
xmlDetectEBCDIC(xmlParserInputPtr input,xmlCharEncodingHandlerPtr * hout)974 xmlDetectEBCDIC(xmlParserInputPtr input, xmlCharEncodingHandlerPtr *hout) {
975     xmlChar out[200];
976     xmlCharEncodingHandlerPtr handler;
977     int inlen, outlen, res, i;
978 
979     *hout = NULL;
980 
981     /*
982      * To detect the EBCDIC code page, we convert the first 200 bytes
983      * to EBCDIC-US and try to find the encoding declaration.
984      */
985     res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
986     if (res != 0)
987         return(res);
988     outlen = sizeof(out) - 1;
989     inlen = input->end - input->cur;
990     res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
991     /*
992      * Return the EBCDIC handler if decoding failed. The error will
993      * be reported later.
994      */
995     if (res < 0)
996         goto done;
997     out[outlen] = 0;
998 
999     for (i = 0; i < outlen; i++) {
1000         if (out[i] == '>')
1001             break;
1002         if ((out[i] == 'e') &&
1003             (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1004             int start, cur, quote;
1005 
1006             i += 8;
1007             while (IS_BLANK_CH(out[i]))
1008                 i += 1;
1009             if (out[i++] != '=')
1010                 break;
1011             while (IS_BLANK_CH(out[i]))
1012                 i += 1;
1013             quote = out[i++];
1014             if ((quote != '\'') && (quote != '"'))
1015                 break;
1016             start = i;
1017             cur = out[i];
1018             while (((cur >= 'a') && (cur <= 'z')) ||
1019                    ((cur >= 'A') && (cur <= 'Z')) ||
1020                    ((cur >= '0') && (cur <= '9')) ||
1021                    (cur == '.') || (cur == '_') ||
1022                    (cur == '-'))
1023                 cur = out[++i];
1024             if (cur != quote)
1025                 break;
1026             out[i] = 0;
1027             xmlCharEncCloseFunc(handler);
1028             res = xmlOpenCharEncodingHandler((char *) out + start,
1029                                              /* output */ 0, &handler);
1030             if (res != 0)
1031                 return(res);
1032             *hout = handler;
1033             return(0);
1034         }
1035     }
1036 
1037 done:
1038     /*
1039      * Encoding handlers are stateful, so we have to recreate them.
1040      */
1041     xmlCharEncCloseFunc(handler);
1042     res = xmlLookupCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC, &handler);
1043     if (res != 0)
1044         return(res);
1045     *hout = handler;
1046     return(0);
1047 }
1048 
1049 /**
1050  * xmlSwitchEncoding:
1051  * @ctxt:  the parser context
1052  * @enc:  the encoding value (number)
1053  *
1054  * Use encoding specified by enum to decode input data. This overrides
1055  * the encoding found in the XML declaration.
1056  *
1057  * This function can also be used to override the encoding of chunks
1058  * passed to xmlParseChunk.
1059  *
1060  * Returns 0 in case of success, -1 otherwise
1061  */
1062 int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt,xmlCharEncoding enc)1063 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1064 {
1065     xmlCharEncodingHandlerPtr handler = NULL;
1066     int ret;
1067     int res;
1068 
1069     if ((ctxt == NULL) || (ctxt->input == NULL))
1070         return(-1);
1071 
1072     switch (enc) {
1073 	case XML_CHAR_ENCODING_NONE:
1074 	case XML_CHAR_ENCODING_UTF8:
1075         case XML_CHAR_ENCODING_ASCII:
1076             res = 0;
1077             break;
1078         case XML_CHAR_ENCODING_EBCDIC:
1079             res = xmlDetectEBCDIC(ctxt->input, &handler);
1080             break;
1081         default:
1082             res = xmlLookupCharEncodingHandler(enc, &handler);
1083             break;
1084     }
1085 
1086     if (res != 0) {
1087         const char *name = xmlGetCharEncodingName(enc);
1088 
1089         xmlFatalErr(ctxt, res, (name ? name : "<null>"));
1090         return(-1);
1091     }
1092 
1093     ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1094 
1095     if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1096         ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1097     }
1098 
1099     return(ret);
1100 }
1101 
1102 /**
1103  * xmlSwitchInputEncodingName:
1104  * @ctxt:  the parser context, only for error reporting
1105  * @input:  the input strea,
1106  * @encoding:  the encoding name
1107  *
1108  * Returns 0 in case of success, -1 otherwise
1109  */
1110 static int
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,const char * encoding)1111 xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1112                            const char *encoding) {
1113     xmlCharEncodingHandlerPtr handler;
1114     int res;
1115 
1116     if (encoding == NULL)
1117         return(-1);
1118 
1119     res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
1120     if (res != 0) {
1121         xmlFatalErr(ctxt, res, encoding);
1122         return(-1);
1123     }
1124 
1125     return(xmlSwitchInputEncoding(ctxt, input, handler));
1126 }
1127 
1128 /**
1129  * xmlSwitchEncodingName:
1130  * @ctxt:  the parser context
1131  * @encoding:  the encoding name
1132  *
1133  * Use specified encoding to decode input data. This overrides the
1134  * encoding found in the XML declaration.
1135  *
1136  * This function can also be used to override the encoding of chunks
1137  * passed to xmlParseChunk.
1138  *
1139  * Available since 2.13.0.
1140  *
1141  * Returns 0 in case of success, -1 otherwise
1142  */
1143 int
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt,const char * encoding)1144 xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
1145     if (ctxt == NULL)
1146         return(-1);
1147 
1148     return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
1149 }
1150 
1151 /**
1152  * xmlInputSetEncodingHandler:
1153  * @input:  the input stream
1154  * @handler:  the encoding handler
1155  *
1156  * Use encoding handler to decode input data.
1157  *
1158  * Closes the handler on error.
1159  *
1160  * Returns an xmlParserErrors code.
1161  */
1162 static int
xmlInputSetEncodingHandler(xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1163 xmlInputSetEncodingHandler(xmlParserInputPtr input,
1164                            xmlCharEncodingHandlerPtr handler) {
1165     int nbchars;
1166     xmlParserInputBufferPtr in;
1167 
1168     if ((input == NULL) || (input->buf == NULL)) {
1169         xmlCharEncCloseFunc(handler);
1170 	return(XML_ERR_ARGUMENT);
1171     }
1172     in = input->buf;
1173 
1174     input->flags |= XML_INPUT_HAS_ENCODING;
1175 
1176     /*
1177      * UTF-8 requires no encoding handler.
1178      */
1179     if ((handler != NULL) &&
1180         (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1181         xmlCharEncCloseFunc(handler);
1182         handler = NULL;
1183     }
1184 
1185     if (in->encoder == handler)
1186         return(XML_ERR_OK);
1187 
1188     if (in->encoder != NULL) {
1189         /*
1190          * Switching encodings during parsing is a really bad idea,
1191          * but Chromium can switch between ISO-8859-1 and UTF-16 before
1192          * separate calls to xmlParseChunk.
1193          *
1194          * TODO: We should check whether the "raw" input buffer is empty and
1195          * convert the old content using the old encoder.
1196          */
1197 
1198         xmlCharEncCloseFunc(in->encoder);
1199         in->encoder = handler;
1200         return(XML_ERR_OK);
1201     }
1202 
1203     in->encoder = handler;
1204 
1205     /*
1206      * Is there already some content down the pipe to convert ?
1207      */
1208     if (xmlBufIsEmpty(in->buffer) == 0) {
1209         xmlBufPtr buf;
1210         size_t processed;
1211 
1212         buf = xmlBufCreate();
1213         if (buf == NULL)
1214             return(XML_ERR_NO_MEMORY);
1215 
1216         /*
1217          * Shrink the current input buffer.
1218          * Move it as the raw buffer and create a new input buffer
1219          */
1220         processed = input->cur - input->base;
1221         xmlBufShrink(in->buffer, processed);
1222         input->consumed += processed;
1223         in->raw = in->buffer;
1224         in->buffer = buf;
1225         in->rawconsumed = processed;
1226 
1227         nbchars = xmlCharEncInput(in);
1228         xmlBufResetInput(in->buffer, input);
1229         if (nbchars < 0)
1230             return(in->error);
1231     }
1232 
1233     return(XML_ERR_OK);
1234 }
1235 
1236 /**
1237  * xmlInputSetEncoding:
1238  * @input:  the input stream
1239  * @encoding:  the encoding name
1240  *
1241  * Use specified encoding to decode input data. This overrides the
1242  * encoding found in the XML declaration.
1243  *
1244  * Available since 2.14.0.
1245  *
1246  * Returns an xmlParserErrors code.
1247  */
1248 int
xmlInputSetEncoding(xmlParserInputPtr input,const char * encoding)1249 xmlInputSetEncoding(xmlParserInputPtr input, const char *encoding) {
1250     xmlCharEncodingHandlerPtr handler;
1251     int res;
1252 
1253     if (encoding == NULL)
1254         return(XML_ERR_ARGUMENT);
1255 
1256     res = xmlOpenCharEncodingHandler(encoding, /* output */ 0, &handler);
1257     if (res != 0)
1258         return(res);
1259 
1260     return(xmlInputSetEncodingHandler(input, handler));
1261 }
1262 
1263 /**
1264  * xmlSwitchInputEncoding:
1265  * @ctxt:  the parser context, only for error reporting
1266  * @input:  the input stream
1267  * @handler:  the encoding handler
1268  *
1269  * DEPRECATED: Internal function, don't use.
1270  *
1271  * Use encoding handler to decode input data.
1272  *
1273  * Returns 0 in case of success, -1 otherwise
1274  */
1275 int
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1276 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1277                        xmlCharEncodingHandlerPtr handler) {
1278     int code = xmlInputSetEncodingHandler(input, handler);
1279 
1280     if (code != XML_ERR_OK) {
1281         xmlCtxtErrIO(ctxt, code, NULL);
1282         return(-1);
1283     }
1284 
1285     return(0);
1286 }
1287 
1288 /**
1289  * xmlSwitchToEncoding:
1290  * @ctxt:  the parser context
1291  * @handler:  the encoding handler
1292  *
1293  * Use encoding handler to decode input data.
1294  *
1295  * This function can be used to enforce the encoding of chunks passed
1296  * to xmlParseChunk.
1297  *
1298  * Returns 0 in case of success, -1 otherwise
1299  */
1300 int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt,xmlCharEncodingHandlerPtr handler)1301 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1302 {
1303     if (ctxt == NULL)
1304         return(-1);
1305     return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1306 }
1307 
1308 /**
1309  * xmlDetectEncoding:
1310  * @ctxt:  the parser context
1311  *
1312  * Handle optional BOM, detect and switch to encoding.
1313  *
1314  * Assumes that there are at least four bytes in the input buffer.
1315  */
1316 void
xmlDetectEncoding(xmlParserCtxtPtr ctxt)1317 xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1318     const xmlChar *in;
1319     xmlCharEncoding enc;
1320     int bomSize;
1321     int autoFlag = 0;
1322 
1323     if (xmlParserGrow(ctxt) < 0)
1324         return;
1325     in = ctxt->input->cur;
1326     if (ctxt->input->end - in < 4)
1327         return;
1328 
1329     if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1330         /*
1331          * If the encoding was already set, only skip the BOM which was
1332          * possibly decoded to UTF-8.
1333          */
1334         if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1335             ctxt->input->cur += 3;
1336         }
1337 
1338         return;
1339     }
1340 
1341     enc = XML_CHAR_ENCODING_NONE;
1342     bomSize = 0;
1343 
1344     switch (in[0]) {
1345         case 0x00:
1346             if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1347                 enc = XML_CHAR_ENCODING_UCS4BE;
1348                 autoFlag = XML_INPUT_AUTO_OTHER;
1349             } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1350                 enc = XML_CHAR_ENCODING_UTF16BE;
1351                 autoFlag = XML_INPUT_AUTO_UTF16BE;
1352             }
1353             break;
1354 
1355         case 0x3C:
1356             if (in[1] == 0x00) {
1357                 if ((in[2] == 0x00) && (in[3] == 0x00)) {
1358                     enc = XML_CHAR_ENCODING_UCS4LE;
1359                     autoFlag = XML_INPUT_AUTO_OTHER;
1360                 } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1361                     enc = XML_CHAR_ENCODING_UTF16LE;
1362                     autoFlag = XML_INPUT_AUTO_UTF16LE;
1363                 }
1364             }
1365             break;
1366 
1367         case 0x4C:
1368 	    if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1369 	        enc = XML_CHAR_ENCODING_EBCDIC;
1370                 autoFlag = XML_INPUT_AUTO_OTHER;
1371             }
1372             break;
1373 
1374         case 0xEF:
1375             if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1376                 enc = XML_CHAR_ENCODING_UTF8;
1377                 autoFlag = XML_INPUT_AUTO_UTF8;
1378                 bomSize = 3;
1379             }
1380             break;
1381 
1382         case 0xFE:
1383             if (in[1] == 0xFF) {
1384                 enc = XML_CHAR_ENCODING_UTF16BE;
1385                 autoFlag = XML_INPUT_AUTO_UTF16BE;
1386                 bomSize = 2;
1387             }
1388             break;
1389 
1390         case 0xFF:
1391             if (in[1] == 0xFE) {
1392                 enc = XML_CHAR_ENCODING_UTF16LE;
1393                 autoFlag = XML_INPUT_AUTO_UTF16LE;
1394                 bomSize = 2;
1395             }
1396             break;
1397     }
1398 
1399     if (bomSize > 0) {
1400         ctxt->input->cur += bomSize;
1401     }
1402 
1403     if (enc != XML_CHAR_ENCODING_NONE) {
1404         ctxt->input->flags |= autoFlag;
1405         xmlSwitchEncoding(ctxt, enc);
1406     }
1407 }
1408 
1409 /**
1410  * xmlSetDeclaredEncoding:
1411  * @ctxt:  the parser context
1412  * @encoding:  declared encoding
1413  *
1414  * Set the encoding from a declaration in the document.
1415  *
1416  * If no encoding was set yet, switch the encoding. Otherwise, only warn
1417  * about encoding mismatches.
1418  *
1419  * Takes ownership of 'encoding'.
1420  */
1421 void
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt,xmlChar * encoding)1422 xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1423     if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1424         ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1425         xmlSwitchEncodingName(ctxt, (const char *) encoding);
1426         ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1427     } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1428         static const char *allowedUTF8[] = {
1429             "UTF-8", "UTF8", NULL
1430         };
1431         static const char *allowedUTF16LE[] = {
1432             "UTF-16", "UTF-16LE", "UTF16", NULL
1433         };
1434         static const char *allowedUTF16BE[] = {
1435             "UTF-16", "UTF-16BE", "UTF16", NULL
1436         };
1437         const char **allowed = NULL;
1438         const char *autoEnc = NULL;
1439 
1440         switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1441             case XML_INPUT_AUTO_UTF8:
1442                 allowed = allowedUTF8;
1443                 autoEnc = "UTF-8";
1444                 break;
1445             case XML_INPUT_AUTO_UTF16LE:
1446                 allowed = allowedUTF16LE;
1447                 autoEnc = "UTF-16LE";
1448                 break;
1449             case XML_INPUT_AUTO_UTF16BE:
1450                 allowed = allowedUTF16BE;
1451                 autoEnc = "UTF-16BE";
1452                 break;
1453         }
1454 
1455         if (allowed != NULL) {
1456             const char **p;
1457             int match = 0;
1458 
1459             for (p = allowed; *p != NULL; p++) {
1460                 if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1461                     match = 1;
1462                     break;
1463                 }
1464             }
1465 
1466             if (match == 0) {
1467                 xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1468                               "Encoding '%s' doesn't match "
1469                               "auto-detected '%s'\n",
1470                               encoding, BAD_CAST autoEnc);
1471                 xmlFree(encoding);
1472                 encoding = xmlStrdup(BAD_CAST autoEnc);
1473                 if (encoding == NULL)
1474                     xmlCtxtErrMemory(ctxt);
1475             }
1476         }
1477     }
1478 
1479     if (ctxt->encoding != NULL)
1480         xmlFree((xmlChar *) ctxt->encoding);
1481     ctxt->encoding = encoding;
1482 }
1483 
1484 /**
1485  * xmlGetActualEncoding:
1486  * @ctxt:  the parser context
1487  *
1488  * Returns the actual used to parse the document. This can differ from
1489  * the declared encoding.
1490  */
1491 const xmlChar *
xmlGetActualEncoding(xmlParserCtxtPtr ctxt)1492 xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1493     const xmlChar *encoding = NULL;
1494 
1495     if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1496         (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1497         /* Preserve encoding exactly */
1498         encoding = ctxt->encoding;
1499     } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1500         encoding = BAD_CAST ctxt->input->buf->encoder->name;
1501     } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1502         encoding = BAD_CAST "UTF-8";
1503     }
1504 
1505     return(encoding);
1506 }
1507 
1508 /************************************************************************
1509  *									*
1510  *	Commodity functions to handle entities processing		*
1511  *									*
1512  ************************************************************************/
1513 
1514 /**
1515  * xmlFreeInputStream:
1516  * @input:  an xmlParserInputPtr
1517  *
1518  * Free up an input stream.
1519  */
1520 void
xmlFreeInputStream(xmlParserInputPtr input)1521 xmlFreeInputStream(xmlParserInputPtr input) {
1522     if (input == NULL) return;
1523 
1524     if (input->filename != NULL) xmlFree((char *) input->filename);
1525     if (input->version != NULL) xmlFree((char *) input->version);
1526     if ((input->free != NULL) && (input->base != NULL))
1527         input->free((xmlChar *) input->base);
1528     if (input->buf != NULL)
1529         xmlFreeParserInputBuffer(input->buf);
1530     xmlFree(input);
1531 }
1532 
1533 /**
1534  * xmlNewInputStream:
1535  * @ctxt:  an XML parser context
1536  *
1537  * Create a new input stream structure.
1538  *
1539  * Returns the new input stream or NULL
1540  */
1541 xmlParserInputPtr
xmlNewInputStream(xmlParserCtxtPtr ctxt)1542 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1543     xmlParserInputPtr input;
1544 
1545     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1546     if (input == NULL) {
1547         xmlCtxtErrMemory(ctxt);
1548 	return(NULL);
1549     }
1550     memset(input, 0, sizeof(xmlParserInput));
1551     input->line = 1;
1552     input->col = 1;
1553 
1554     return(input);
1555 }
1556 
1557 /**
1558  * xmlNewInputURL:
1559  * @ctxt:  parser context
1560  * @url:  filename or URL
1561  * @publicId:  publid ID from doctype (optional)
1562  * @encoding:  character encoding (optional)
1563  * @flags:  unused, pass 0
1564  *
1565  * Creates a new parser input from the filesystem, the network or
1566  * a user-defined resource loader.
1567  *
1568  * Returns a new parser input.
1569  */
1570 xmlParserInputPtr
xmlNewInputURL(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,const char * encoding,int flags ATTRIBUTE_UNUSED)1571 xmlNewInputURL(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
1572                const char *encoding, int flags ATTRIBUTE_UNUSED) {
1573     xmlParserInputPtr input;
1574 
1575     if ((ctxt == NULL) || (url == NULL))
1576 	return(NULL);
1577 
1578     input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
1579     if (input == NULL)
1580         return(NULL);
1581 
1582     if (encoding != NULL)
1583         xmlSwitchInputEncodingName(ctxt, input, encoding);
1584 
1585     return(input);
1586 }
1587 
1588 /**
1589  * xmlNewInputInternal:
1590  * @buf:  parser input buffer
1591  * @filename:  filename or URL
1592  *
1593  * Internal helper function.
1594  *
1595  * Returns a new parser input.
1596  */
1597 static xmlParserInputPtr
xmlNewInputInternal(xmlParserInputBufferPtr buf,const char * filename)1598 xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
1599     xmlParserInputPtr input;
1600 
1601     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1602     if (input == NULL) {
1603 	xmlFreeParserInputBuffer(buf);
1604 	return(NULL);
1605     }
1606     memset(input, 0, sizeof(xmlParserInput));
1607     input->line = 1;
1608     input->col = 1;
1609 
1610     input->buf = buf;
1611     xmlBufResetInput(input->buf->buffer, input);
1612 
1613     if (filename != NULL) {
1614         input->filename = xmlMemStrdup(filename);
1615         if (input->filename == NULL) {
1616             xmlFreeInputStream(input);
1617             return(NULL);
1618         }
1619     }
1620 
1621     return(input);
1622 }
1623 
1624 /**
1625  * xmlInputCreateMemory:
1626  * @url:  base URL (optional)
1627  * @mem:  pointer to char array
1628  * @size:  size of array
1629  * @flags:  optimization hints
1630  *
1631  * Creates a new parser input to read from a memory area.
1632  *
1633  * @url is used as base to resolve external entities and for
1634  * error reporting.
1635  *
1636  * If the XML_INPUT_BUF_STATIC flag is set, the memory area must
1637  * stay unchanged until parsing has finished. This can avoid
1638  * temporary copies.
1639  *
1640  * If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
1641  * area must contain a zero byte after the buffer at position @size.
1642  * This can avoid temporary copies.
1643  *
1644  * Available since 2.14.0.
1645  *
1646  * Returns a new parser input or NULL if a memory allocation failed.
1647  */
1648 xmlParserInputPtr
xmlInputCreateMemory(const char * url,const void * mem,size_t size,int flags)1649 xmlInputCreateMemory(const char *url, const void *mem, size_t size,
1650                      int flags) {
1651     xmlParserInputBufferPtr buf;
1652 
1653     if (mem == NULL)
1654 	return(NULL);
1655 
1656     buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
1657     if (buf == NULL)
1658         return(NULL);
1659 
1660     return(xmlNewInputInternal(buf, url));
1661 }
1662 
1663 /**
1664  * xmlNewInputMemory:
1665  * @ctxt:  parser context
1666  * @url:  base URL (optional)
1667  * @mem:  pointer to char array
1668  * @size:  size of array
1669  * @encoding:  character encoding (optional)
1670  * @flags:  optimization hints
1671  *
1672  * Returns a new parser input or NULL in case of error.
1673  */
1674 xmlParserInputPtr
xmlNewInputMemory(xmlParserCtxtPtr ctxt,const char * url,const void * mem,size_t size,const char * encoding,int flags)1675 xmlNewInputMemory(xmlParserCtxtPtr ctxt, const char *url,
1676                   const void *mem, size_t size,
1677                   const char *encoding, int flags) {
1678     xmlParserInputPtr input;
1679 
1680     if ((ctxt == NULL) || (mem == NULL))
1681 	return(NULL);
1682 
1683     input = xmlInputCreateMemory(url, mem, size, flags);
1684     if (input == NULL) {
1685         xmlCtxtErrMemory(ctxt);
1686         return(NULL);
1687     }
1688 
1689     if (encoding != NULL)
1690         xmlSwitchInputEncodingName(ctxt, input, encoding);
1691 
1692     return(input);
1693 }
1694 
1695 /**
1696  * xmlInputCreateString:
1697  * @url:  base URL (optional)
1698  * @str:  zero-terminated string
1699  * @flags:  optimization hints
1700  *
1701  * Creates a new parser input to read from a zero-terminated string.
1702  *
1703  * @url is used as base to resolve external entities and for
1704  * error reporting.
1705  *
1706  * If the XML_INPUT_BUF_STATIC flag is set, the string must
1707  * stay unchanged until parsing has finished. This can avoid
1708  * temporary copies.
1709  *
1710  * Available since 2.14.0.
1711  *
1712  * Returns a new parser input or NULL if a memory allocation failed.
1713  */
1714 xmlParserInputPtr
xmlInputCreateString(const char * url,const char * str,int flags)1715 xmlInputCreateString(const char *url, const char *str, int flags) {
1716     xmlParserInputBufferPtr buf;
1717 
1718     if (str == NULL)
1719 	return(NULL);
1720 
1721     buf = xmlNewInputBufferString(str, flags);
1722     if (buf == NULL)
1723         return(NULL);
1724 
1725     return(xmlNewInputInternal(buf, url));
1726 }
1727 
1728 /**
1729  * xmlNewInputString:
1730  * @ctxt:  parser context
1731  * @url:  base URL (optional)
1732  * @str:  zero-terminated string
1733  * @encoding:  character encoding (optional)
1734  * @flags:  optimization hints
1735  *
1736  * Returns a new parser input.
1737  */
1738 xmlParserInputPtr
xmlNewInputString(xmlParserCtxtPtr ctxt,const char * url,const char * str,const char * encoding,int flags)1739 xmlNewInputString(xmlParserCtxtPtr ctxt, const char *url,
1740                   const char *str, const char *encoding, int flags) {
1741     xmlParserInputPtr input;
1742 
1743     if ((ctxt == NULL) || (str == NULL))
1744 	return(NULL);
1745 
1746     input = xmlInputCreateString(url, str, flags);
1747     if (input == NULL) {
1748         xmlCtxtErrMemory(ctxt);
1749         return(NULL);
1750     }
1751 
1752     if (encoding != NULL)
1753         xmlSwitchInputEncodingName(ctxt, input, encoding);
1754 
1755     return(input);
1756 }
1757 
1758 /**
1759  * xmlInputCreateFd:
1760  * @url:  base URL (optional)
1761  * @fd:  file descriptor
1762  * @flags:  unused, pass 0
1763  *
1764  * Creates a new parser input to read from a zero-terminated string.
1765  *
1766  * @url is used as base to resolve external entities and for
1767  * error reporting.
1768  *
1769  * @fd is closed after parsing has finished.
1770  *
1771  * Available since 2.14.0.
1772  *
1773  * Returns a new parser input or NULL if a memory allocation failed.
1774  */
1775 xmlParserInputPtr
xmlInputCreateFd(const char * url,int fd,int flags ATTRIBUTE_UNUSED)1776 xmlInputCreateFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) {
1777     xmlParserInputBufferPtr buf;
1778 
1779     if (fd < 0)
1780 	return(NULL);
1781 
1782     buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
1783     if (buf == NULL)
1784         return(NULL);
1785 
1786     return(xmlNewInputInternal(buf, url));
1787 }
1788 
1789 /**
1790  * xmlNewInputFd:
1791  * @ctxt:  parser context
1792  * @url:  base URL (optional)
1793  * @fd:  file descriptor
1794  * @encoding:  character encoding (optional)
1795  * @flags:  unused, pass 0
1796  *
1797  * Returns a new parser input.
1798  */
1799 xmlParserInputPtr
xmlNewInputFd(xmlParserCtxtPtr ctxt,const char * url,int fd,const char * encoding,int flags)1800 xmlNewInputFd(xmlParserCtxtPtr ctxt, const char *url,
1801               int fd, const char *encoding, int flags) {
1802     xmlParserInputPtr input;
1803 
1804     if ((ctxt == NULL) || (fd < 0))
1805 	return(NULL);
1806 
1807     input = xmlInputCreateFd(url, fd, flags);
1808     if (input == NULL) {
1809 	xmlCtxtErrMemory(ctxt);
1810         return(NULL);
1811     }
1812 
1813     if (encoding != NULL)
1814         xmlSwitchInputEncodingName(ctxt, input, encoding);
1815 
1816     return(input);
1817 }
1818 
1819 /**
1820  * xmlInputCreateIO:
1821  * @url:  base URL (optional)
1822  * @ioRead:  read callback
1823  * @ioClose:  close callback (optional)
1824  * @ioCtxt:  IO context
1825  * @flags:  unused, pass 0
1826  *
1827  * Creates a new parser input to read from input callbacks and
1828  * cintext.
1829  *
1830  * @url is used as base to resolve external entities and for
1831  * error reporting.
1832  *
1833  * @ioRead is called to read new data into a provided buffer.
1834  * It must return the number of bytes written into the buffer
1835  * ot a negative xmlParserErrors code on failure.
1836  *
1837  * @ioClose is called after parsing has finished.
1838  *
1839  * @ioCtxt is an opaque pointer passed to the callbacks.
1840  *
1841  * Available since 2.14.0.
1842  *
1843  * Returns a new parser input or NULL if a memory allocation failed.
1844  */
1845 xmlParserInputPtr
xmlInputCreateIO(const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,int flags ATTRIBUTE_UNUSED)1846 xmlInputCreateIO(const char *url, xmlInputReadCallback ioRead,
1847                  xmlInputCloseCallback ioClose, void *ioCtxt,
1848                  int flags ATTRIBUTE_UNUSED) {
1849     xmlParserInputBufferPtr buf;
1850 
1851     if (ioRead == NULL)
1852 	return(NULL);
1853 
1854     buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1855     if (buf == NULL) {
1856         if (ioClose != NULL)
1857             ioClose(ioCtxt);
1858         return(NULL);
1859     }
1860 
1861     buf->context = ioCtxt;
1862     buf->readcallback = ioRead;
1863     buf->closecallback = ioClose;
1864 
1865     return(xmlNewInputInternal(buf, url));
1866 }
1867 
1868 /**
1869  * xmlNewInputIO:
1870  * @ctxt:  parser context
1871  * @url:  base URL (optional)
1872  * @ioRead:  read callback
1873  * @ioClose:  close callback (optional)
1874  * @ioCtxt:  IO context
1875  * @encoding:  character encoding (optional)
1876  * @flags:  unused, pass 0
1877  *
1878  * Returns a new parser input.
1879  */
1880 xmlParserInputPtr
xmlNewInputIO(xmlParserCtxtPtr ctxt,const char * url,xmlInputReadCallback ioRead,xmlInputCloseCallback ioClose,void * ioCtxt,const char * encoding,int flags)1881 xmlNewInputIO(xmlParserCtxtPtr ctxt, const char *url,
1882               xmlInputReadCallback ioRead, xmlInputCloseCallback ioClose,
1883               void *ioCtxt, const char *encoding, int flags) {
1884     xmlParserInputPtr input;
1885 
1886     if ((ctxt == NULL) || (ioRead == NULL))
1887 	return(NULL);
1888 
1889     input = xmlInputCreateIO(url, ioRead, ioClose, ioCtxt, flags);
1890     if (input == NULL) {
1891         xmlCtxtErrMemory(ctxt);
1892         return(NULL);
1893     }
1894 
1895     if (encoding != NULL)
1896         xmlSwitchInputEncodingName(ctxt, input, encoding);
1897 
1898     return(input);
1899 }
1900 
1901 /**
1902  * xmlInputCreatePush:
1903  * @url:  base URL (optional)
1904  * @chunk:  pointer to char array
1905  * @size:  size of array
1906  *
1907  * Creates a new parser input for a push parser.
1908  *
1909  * Returns a new parser input or NULL if a memory allocation failed.
1910  */
1911 xmlParserInputPtr
xmlInputCreatePush(const char * url,const char * chunk,int size)1912 xmlInputCreatePush(const char *url, const char *chunk, int size) {
1913     xmlParserInputBufferPtr buf;
1914     xmlParserInputPtr input;
1915 
1916     buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
1917     if (buf == NULL)
1918         return(NULL);
1919 
1920     input = xmlNewInputInternal(buf, url);
1921     if (input == NULL)
1922 	return(NULL);
1923 
1924     input->flags |= XML_INPUT_PROGRESSIVE;
1925 
1926     if ((size > 0) && (chunk != NULL)) {
1927         int res;
1928 
1929 	res = xmlParserInputBufferPush(input->buf, size, chunk);
1930         xmlBufResetInput(input->buf->buffer, input);
1931         if (res < 0) {
1932             xmlFreeInputStream(input);
1933             return(NULL);
1934         }
1935     }
1936 
1937     return(input);
1938 }
1939 
1940 /**
1941  * xmlNewIOInputStream:
1942  * @ctxt:  an XML parser context
1943  * @buf:  an input buffer
1944  * @enc:  the charset encoding if known
1945  *
1946  * Create a new input stream structure encapsulating the @input into
1947  * a stream suitable for the parser.
1948  *
1949  * Returns the new input stream or NULL
1950  */
1951 xmlParserInputPtr
xmlNewIOInputStream(xmlParserCtxtPtr ctxt,xmlParserInputBufferPtr buf,xmlCharEncoding enc)1952 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
1953 	            xmlCharEncoding enc) {
1954     xmlParserInputPtr input;
1955     const char *encoding;
1956 
1957     if (buf == NULL)
1958         return(NULL);
1959 
1960     input = xmlNewInputInternal(buf, NULL);
1961     if (input == NULL) {
1962         xmlCtxtErrMemory(ctxt);
1963 	return(NULL);
1964     }
1965 
1966     encoding = xmlGetCharEncodingName(enc);
1967     if (encoding != NULL)
1968         xmlSwitchInputEncodingName(ctxt, input, encoding);
1969 
1970     return(input);
1971 }
1972 
1973 /**
1974  * xmlNewEntityInputStream:
1975  * @ctxt:  an XML parser context
1976  * @ent:  an Entity pointer
1977  *
1978  * DEPRECATED: Internal function, do not use.
1979  *
1980  * Create a new input stream based on an xmlEntityPtr
1981  *
1982  * Returns the new input stream or NULL
1983  */
1984 xmlParserInputPtr
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)1985 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
1986     xmlParserInputPtr input;
1987 
1988     if ((ctxt == NULL) || (ent == NULL))
1989 	return(NULL);
1990 
1991     if (ent->content != NULL) {
1992         input = xmlNewInputString(ctxt, NULL, (const char *) ent->content,
1993                                   NULL, XML_INPUT_BUF_STATIC);
1994     } else if (ent->URI != NULL) {
1995         xmlResourceType rtype;
1996 
1997         if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
1998             rtype = XML_RESOURCE_PARAMETER_ENTITY;
1999         else
2000             rtype = XML_RESOURCE_GENERAL_ENTITY;
2001 
2002         input = xmlLoadResource(ctxt, (char *) ent->URI,
2003                                 (char *) ent->ExternalID, rtype);
2004     } else {
2005         return(NULL);
2006     }
2007 
2008     if (input == NULL)
2009         return(NULL);
2010 
2011     input->entity = ent;
2012 
2013     return(input);
2014 }
2015 
2016 /**
2017  * xmlNewStringInputStream:
2018  * @ctxt:  an XML parser context
2019  * @buffer:  an memory buffer
2020  *
2021  * Create a new input stream based on a memory buffer.
2022  *
2023  * Returns the new input stream
2024  */
2025 xmlParserInputPtr
xmlNewStringInputStream(xmlParserCtxtPtr ctxt,const xmlChar * buffer)2026 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
2027     return(xmlNewInputString(ctxt, NULL, (const char *) buffer, NULL, 0));
2028 }
2029 
2030 
2031 /****************************************************************
2032  *								*
2033  *		External entities loading			*
2034  *								*
2035  ****************************************************************/
2036 
2037 #ifdef LIBXML_CATALOG_ENABLED
2038 
2039 /**
2040  * xmlResolveResourceFromCatalog:
2041  * @URL:  the URL for the entity to load
2042  * @ID:  the System ID for the entity to load
2043  * @ctxt:  the context in which the entity is called or NULL
2044  *
2045  * Resolves the URL and ID against the appropriate catalog.
2046  * This function is used by xmlDefaultExternalEntityLoader and
2047  * xmlNoNetExternalEntityLoader.
2048  *
2049  * Returns a new allocated URL, or NULL.
2050  */
2051 static xmlChar *
xmlResolveResourceFromCatalog(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2052 xmlResolveResourceFromCatalog(const char *URL, const char *ID,
2053                               xmlParserCtxtPtr ctxt) {
2054     xmlChar *resource = NULL;
2055     xmlCatalogAllow pref;
2056 
2057     /*
2058      * If the resource doesn't exists as a file,
2059      * try to load it from the resource pointed in the catalogs
2060      */
2061     pref = xmlCatalogGetDefaults();
2062 
2063     if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
2064 	/*
2065 	 * Do a local lookup
2066 	 */
2067 	if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
2068 	    ((pref == XML_CATA_ALLOW_ALL) ||
2069 	     (pref == XML_CATA_ALLOW_DOCUMENT))) {
2070 	    resource = xmlCatalogLocalResolve(ctxt->catalogs,
2071 					      (const xmlChar *)ID,
2072 					      (const xmlChar *)URL);
2073         }
2074 	/*
2075 	 * Try a global lookup
2076 	 */
2077 	if ((resource == NULL) &&
2078 	    ((pref == XML_CATA_ALLOW_ALL) ||
2079 	     (pref == XML_CATA_ALLOW_GLOBAL))) {
2080 	    resource = xmlCatalogResolve((const xmlChar *)ID,
2081 					 (const xmlChar *)URL);
2082 	}
2083 	if ((resource == NULL) && (URL != NULL))
2084 	    resource = xmlStrdup((const xmlChar *) URL);
2085 
2086 	/*
2087 	 * TODO: do an URI lookup on the reference
2088 	 */
2089 	if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
2090 	    xmlChar *tmp = NULL;
2091 
2092 	    if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
2093 		((pref == XML_CATA_ALLOW_ALL) ||
2094 		 (pref == XML_CATA_ALLOW_DOCUMENT))) {
2095 		tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
2096 	    }
2097 	    if ((tmp == NULL) &&
2098 		((pref == XML_CATA_ALLOW_ALL) ||
2099 	         (pref == XML_CATA_ALLOW_GLOBAL))) {
2100 		tmp = xmlCatalogResolveURI(resource);
2101 	    }
2102 
2103 	    if (tmp != NULL) {
2104 		xmlFree(resource);
2105 		resource = tmp;
2106 	    }
2107 	}
2108     }
2109 
2110     return resource;
2111 }
2112 
2113 #endif
2114 
2115 /**
2116  * xmlCheckHTTPInput:
2117  * @ctxt: an XML parser context
2118  * @ret: an XML parser input
2119  *
2120  * DEPRECATED: Internal function, don't use.
2121  *
2122  * Check an input in case it was created from an HTTP stream, in that
2123  * case it will handle encoding and update of the base URL in case of
2124  * redirection. It also checks for HTTP errors in which case the input
2125  * is cleanly freed up and an appropriate error is raised in context
2126  *
2127  * Returns the input or NULL in case of HTTP error.
2128  */
2129 xmlParserInputPtr
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr ret)2130 xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
2131     /* Avoid unused variable warning if features are disabled. */
2132     (void) ctxt;
2133 
2134 #ifdef LIBXML_HTTP_ENABLED
2135     if ((ret != NULL) && (ret->buf != NULL) &&
2136         (ret->buf->readcallback == xmlIOHTTPRead) &&
2137         (ret->buf->context != NULL)) {
2138         const char *encoding;
2139         const char *redir;
2140         const char *mime;
2141         int code;
2142 
2143         code = xmlNanoHTTPReturnCode(ret->buf->context);
2144         if (code >= 400) {
2145             /* fatal error */
2146 	    if (ret->filename != NULL)
2147                 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
2148 	    else
2149                 xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
2150             xmlFreeInputStream(ret);
2151             ret = NULL;
2152         } else {
2153 
2154             mime = xmlNanoHTTPMimeType(ret->buf->context);
2155             if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
2156                 (xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
2157                 encoding = xmlNanoHTTPEncoding(ret->buf->context);
2158                 if (encoding != NULL)
2159                     xmlSwitchEncodingName(ctxt, encoding);
2160 #if 0
2161             } else if (xmlStrstr(BAD_CAST mime, BAD_CAST "html")) {
2162 #endif
2163             }
2164             redir = xmlNanoHTTPRedir(ret->buf->context);
2165             if (redir != NULL) {
2166                 if (ret->filename != NULL)
2167                     xmlFree((xmlChar *) ret->filename);
2168                 ret->filename =
2169                     (char *) xmlStrdup((const xmlChar *) redir);
2170             }
2171         }
2172     }
2173 #endif
2174     return(ret);
2175 }
2176 
2177 /**
2178  * xmlInputCreateUrl:
2179  * @filename:  the filename to use as entity
2180  * @flags:  XML_INPUT flags
2181  * @out:  pointer to new parser input
2182  *
2183  * Create a new input stream based on a file or a URL.
2184  *
2185  * The flag XML_INPUT_UNZIP allows decompression.
2186  *
2187  * The flag XML_INPUT_NETWORK allows network access.
2188  *
2189  * Available since 2.14.0.
2190  *
2191  * Returns an xmlParserErrors code.
2192  */
2193 int
xmlInputCreateUrl(const char * filename,int flags,xmlParserInputPtr * out)2194 xmlInputCreateUrl(const char *filename, int flags, xmlParserInputPtr *out) {
2195     xmlParserInputBufferPtr buf;
2196     xmlParserInputPtr input;
2197     int code = XML_ERR_OK;
2198 
2199     if (out == NULL)
2200         return(XML_ERR_ARGUMENT);
2201     *out = NULL;
2202     if (filename == NULL)
2203         return(XML_ERR_ARGUMENT);
2204 
2205     if (xmlParserInputBufferCreateFilenameValue != NULL) {
2206         buf = xmlParserInputBufferCreateFilenameValue(filename,
2207                 XML_CHAR_ENCODING_NONE);
2208         if (buf == NULL)
2209             code = XML_IO_ENOENT;
2210     } else {
2211         code = xmlParserInputBufferCreateUrl(filename, XML_CHAR_ENCODING_NONE,
2212                                              flags, &buf);
2213     }
2214     if (code != XML_ERR_OK)
2215 	return(code);
2216 
2217     input = xmlNewInputInternal(buf, filename);
2218     if (input == NULL)
2219 	return(XML_ERR_NO_MEMORY);
2220 
2221     /*input = xmlCheckHTTPInput(ctxt, input);*/
2222 
2223     *out = input;
2224     return(XML_ERR_OK);
2225 }
2226 
2227 /**
2228  * xmlNewInputFromFile:
2229  * @ctxt:  an XML parser context
2230  * @filename:  the filename to use as entity
2231  *
2232  * Create a new input stream based on a file or an URL.
2233  *
2234  * Returns the new input stream or NULL in case of error
2235  */
2236 xmlParserInputPtr
xmlNewInputFromFile(xmlParserCtxtPtr ctxt,const char * filename)2237 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
2238     xmlParserInputPtr input;
2239     int flags = 0;
2240     int code;
2241 
2242     if ((ctxt == NULL) || (filename == NULL))
2243         return(NULL);
2244 
2245     if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2246         flags |= XML_INPUT_UNZIP;
2247     if ((ctxt->options & XML_PARSE_NONET) == 0)
2248         flags |= XML_INPUT_NETWORK;
2249 
2250     code = xmlInputCreateUrl(filename, flags, &input);
2251     if (code != XML_ERR_OK) {
2252         xmlCtxtErrIO(ctxt, code, filename);
2253         return(NULL);
2254     }
2255 
2256     input = xmlCheckHTTPInput(ctxt, input);
2257 
2258     return(input);
2259 }
2260 
2261 /**
2262  * xmlDefaultExternalEntityLoader:
2263  * @URL:  the URL for the entity to load
2264  * @ID:  the System ID for the entity to load
2265  * @ctxt:  the context in which the entity is called or NULL
2266  *
2267  * By default we don't load external entities, yet.
2268  *
2269  * Returns a new allocated xmlParserInputPtr, or NULL.
2270  */
2271 static xmlParserInputPtr
xmlDefaultExternalEntityLoader(const char * url,const char * ID,xmlParserCtxtPtr ctxt)2272 xmlDefaultExternalEntityLoader(const char *url, const char *ID,
2273                                xmlParserCtxtPtr ctxt)
2274 {
2275     xmlParserInputPtr input = NULL;
2276     char *resource = NULL;
2277 
2278     (void) ID;
2279 
2280     if (url == NULL)
2281         return(NULL);
2282 
2283 #ifdef LIBXML_CATALOG_ENABLED
2284     resource = (char *) xmlResolveResourceFromCatalog(url, ID, ctxt);
2285     if (resource != NULL)
2286 	url = resource;
2287 #endif
2288 
2289     if ((ctxt != NULL) &&
2290         (ctxt->options & XML_PARSE_NONET) &&
2291         (xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
2292         xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
2293     } else {
2294         input = xmlNewInputFromFile(ctxt, url);
2295     }
2296 
2297     if (resource != NULL)
2298 	xmlFree(resource);
2299     return(input);
2300 }
2301 
2302 /**
2303  * xmlNoNetExternalEntityLoader:
2304  * @URL:  the URL for the entity to load
2305  * @ID:  the System ID for the entity to load
2306  * @ctxt:  the context in which the entity is called or NULL
2307  *
2308  * DEPRECATED: Use XML_PARSE_NONET.
2309  *
2310  * A specific entity loader disabling network accesses, though still
2311  * allowing local catalog accesses for resolution.
2312  *
2313  * Returns a new allocated xmlParserInputPtr, or NULL.
2314  */
2315 xmlParserInputPtr
xmlNoNetExternalEntityLoader(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2316 xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
2317                              xmlParserCtxtPtr ctxt) {
2318     int oldOptions = 0;
2319     xmlParserInputPtr input;
2320 
2321     if (ctxt != NULL) {
2322         oldOptions = ctxt->options;
2323         ctxt->options |= XML_PARSE_NONET;
2324     }
2325 
2326     input = xmlDefaultExternalEntityLoader(URL, ID, ctxt);
2327 
2328     if (ctxt != NULL)
2329         ctxt->options = oldOptions;
2330 
2331     return(input);
2332 }
2333 
2334 /*
2335  * This global has to die eventually
2336  */
2337 static xmlExternalEntityLoader
2338 xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
2339 
2340 /**
2341  * xmlSetExternalEntityLoader:
2342  * @f:  the new entity resolver function
2343  *
2344  * DEPRECATED: This is a global setting and not thread-safe. Use
2345  * xmlCtxtSetResourceLoader or similar functions.
2346  *
2347  * Changes the default external entity resolver function for the
2348  * application.
2349  */
2350 void
xmlSetExternalEntityLoader(xmlExternalEntityLoader f)2351 xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
2352     xmlCurrentExternalEntityLoader = f;
2353 }
2354 
2355 /**
2356  * xmlGetExternalEntityLoader:
2357  *
2358  * DEPRECATED: See xmlSetExternalEntityLoader.
2359  *
2360  * Get the default external entity resolver function for the application
2361  *
2362  * Returns the xmlExternalEntityLoader function pointer
2363  */
2364 xmlExternalEntityLoader
xmlGetExternalEntityLoader(void)2365 xmlGetExternalEntityLoader(void) {
2366     return(xmlCurrentExternalEntityLoader);
2367 }
2368 
2369 /**
2370  * xmlCtxtSetResourceLoader:
2371  * @ctxt:  parser context
2372  * @loader:  callback
2373  * @vctxt:  user data
2374  *
2375  * Installs a custom callback to load documents, DTDs or external
2376  * entities.
2377  *
2378  * Available since 2.14.0.
2379  */
2380 void
xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt,xmlResourceLoader loader,void * vctxt)2381 xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt, xmlResourceLoader loader,
2382                          void *vctxt) {
2383     if (ctxt == NULL)
2384         return;
2385 
2386     ctxt->resourceLoader = loader;
2387     ctxt->resourceCtxt = vctxt;
2388 }
2389 
2390 /**
2391  * xmlLoadResource:
2392  * @ctxt:  parser context
2393  * @url:  the URL for the entity to load
2394  * @publicId:  the Public ID for the entity to load
2395  * @type:  resource type
2396  *
2397  * Returns the xmlParserInputPtr or NULL in case of error.
2398  */
2399 xmlParserInputPtr
xmlLoadResource(xmlParserCtxtPtr ctxt,const char * url,const char * publicId,xmlResourceType type)2400 xmlLoadResource(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
2401                 xmlResourceType type) {
2402     char *canonicFilename;
2403     xmlParserInputPtr ret;
2404 
2405     if (url == NULL)
2406         return(NULL);
2407 
2408     if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
2409         int flags = 0;
2410         int code;
2411 
2412         if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
2413             flags |= XML_INPUT_UNZIP;
2414         if ((ctxt->options & XML_PARSE_NONET) == 0)
2415             flags |= XML_INPUT_NETWORK;
2416 
2417         code = ctxt->resourceLoader(ctxt->resourceCtxt, url, publicId, flags,
2418                                     type, &ret);
2419         if (code != XML_ERR_OK) {
2420             xmlCtxtErrIO(ctxt, code, url);
2421             return(NULL);
2422         }
2423         return(ret);
2424     }
2425 
2426     canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
2427     if (canonicFilename == NULL) {
2428         xmlCtxtErrMemory(ctxt);
2429         return(NULL);
2430     }
2431 
2432     ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
2433     xmlFree(canonicFilename);
2434     return(ret);
2435 }
2436 
2437 /**
2438  * xmlLoadExternalEntity:
2439  * @URL:  the URL for the entity to load
2440  * @ID:  the Public ID for the entity to load
2441  * @ctxt:  the context in which the entity is called or NULL
2442  *
2443  * @URL is a filename or URL. If if contains the substring "://",
2444  * it is assumed to be a Legacy Extended IRI. Otherwise, it is
2445  * treated as a filesystem path.
2446  *
2447  * @ID is an optional XML public ID, typically from a doctype
2448  * declaration. It is used for catalog lookups.
2449  *
2450  * The following resource loaders will be called if they were
2451  * registered (in order of precedence):
2452  *
2453  * - the resource loader set with xmlCtxtSetResourceLoader
2454  * - the global external entity loader set with
2455  *   xmlSetExternalEntityLoader
2456  * - the per-thread xmlParserInputBufferCreateFilenameFunc set with
2457  *   xmlParserInputBufferCreateFilenameDefault
2458  * - the default loader which will return
2459  *   - the result from a matching global input callback set with
2460  *     xmlRegisterInputCallbacks
2461  *   - a HTTP resource if support is compiled in.
2462  *   - a file opened from the filesystem, with automatic detection
2463  *     of compressed files if support is compiled in.
2464  *
2465  * Returns the xmlParserInputPtr or NULL
2466  */
2467 xmlParserInputPtr
xmlLoadExternalEntity(const char * URL,const char * ID,xmlParserCtxtPtr ctxt)2468 xmlLoadExternalEntity(const char *URL, const char *ID,
2469                       xmlParserCtxtPtr ctxt) {
2470     return(xmlLoadResource(ctxt, URL, ID, XML_RESOURCE_UNKNOWN));
2471 }
2472 
2473 /************************************************************************
2474  *									*
2475  *		Commodity functions to handle parser contexts		*
2476  *									*
2477  ************************************************************************/
2478 
2479 /**
2480  * xmlInitSAXParserCtxt:
2481  * @ctxt:  XML parser context
2482  * @sax:  SAX handlert
2483  * @userData:  user data
2484  *
2485  * Initialize a SAX parser context
2486  *
2487  * Returns 0 in case of success and -1 in case of error
2488  */
2489 
2490 static int
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt,const xmlSAXHandler * sax,void * userData)2491 xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
2492                      void *userData)
2493 {
2494     xmlParserInputPtr input;
2495 
2496     if (ctxt == NULL)
2497         return(-1);
2498 
2499     if (ctxt->dict == NULL)
2500 	ctxt->dict = xmlDictCreate();
2501     if (ctxt->dict == NULL)
2502 	return(-1);
2503     xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
2504 
2505     if (ctxt->sax == NULL)
2506 	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
2507     if (ctxt->sax == NULL)
2508 	return(-1);
2509     if (sax == NULL) {
2510 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2511         xmlSAXVersion(ctxt->sax, 2);
2512         ctxt->userData = ctxt;
2513     } else {
2514 	if (sax->initialized == XML_SAX2_MAGIC) {
2515 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
2516         } else {
2517 	    memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
2518 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
2519         }
2520         ctxt->userData = userData ? userData : ctxt;
2521     }
2522 
2523     ctxt->maxatts = 0;
2524     ctxt->atts = NULL;
2525     /* Allocate the Input stack */
2526     if (ctxt->inputTab == NULL) {
2527 	ctxt->inputTab = (xmlParserInputPtr *)
2528 		    xmlMalloc(5 * sizeof(xmlParserInputPtr));
2529 	ctxt->inputMax = 5;
2530     }
2531     if (ctxt->inputTab == NULL)
2532 	return(-1);
2533     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2534         xmlFreeInputStream(input);
2535     }
2536     ctxt->inputNr = 0;
2537     ctxt->input = NULL;
2538 
2539     ctxt->version = NULL;
2540     ctxt->encoding = NULL;
2541     ctxt->standalone = -1;
2542     ctxt->hasExternalSubset = 0;
2543     ctxt->hasPErefs = 0;
2544     ctxt->html = 0;
2545     ctxt->instate = XML_PARSER_START;
2546 
2547     /* Allocate the Node stack */
2548     if (ctxt->nodeTab == NULL) {
2549 	ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
2550 	ctxt->nodeMax = 10;
2551     }
2552     if (ctxt->nodeTab == NULL)
2553 	return(-1);
2554     ctxt->nodeNr = 0;
2555     ctxt->node = NULL;
2556 
2557     /* Allocate the Name stack */
2558     if (ctxt->nameTab == NULL) {
2559 	ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
2560 	ctxt->nameMax = 10;
2561     }
2562     if (ctxt->nameTab == NULL)
2563 	return(-1);
2564     ctxt->nameNr = 0;
2565     ctxt->name = NULL;
2566 
2567     /* Allocate the space stack */
2568     if (ctxt->spaceTab == NULL) {
2569 	ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
2570 	ctxt->spaceMax = 10;
2571     }
2572     if (ctxt->spaceTab == NULL)
2573 	return(-1);
2574     ctxt->spaceNr = 1;
2575     ctxt->spaceMax = 10;
2576     ctxt->spaceTab[0] = -1;
2577     ctxt->space = &ctxt->spaceTab[0];
2578     ctxt->myDoc = NULL;
2579     ctxt->wellFormed = 1;
2580     ctxt->nsWellFormed = 1;
2581     ctxt->valid = 1;
2582 
2583     ctxt->options = XML_PARSE_NODICT;
2584 
2585     /*
2586      * Initialize some parser options from deprecated global variables.
2587      * Note that the "modern" API taking options arguments or
2588      * xmlCtxtSetOptions will ignore these defaults. They're only
2589      * relevant if old API functions like xmlParseFile are used.
2590      */
2591     ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
2592     if (ctxt->loadsubset) {
2593         ctxt->options |= XML_PARSE_DTDLOAD;
2594     }
2595     ctxt->validate = xmlDoValidityCheckingDefaultValue;
2596     if (ctxt->validate) {
2597         ctxt->options |= XML_PARSE_DTDVALID;
2598     }
2599     ctxt->pedantic = xmlPedanticParserDefaultValue;
2600     if (ctxt->pedantic) {
2601         ctxt->options |= XML_PARSE_PEDANTIC;
2602     }
2603     ctxt->linenumbers = xmlLineNumbersDefaultValue;
2604     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
2605     if (ctxt->keepBlanks == 0) {
2606 	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
2607 	ctxt->options |= XML_PARSE_NOBLANKS;
2608     }
2609     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
2610     if (ctxt->replaceEntities) {
2611         ctxt->options |= XML_PARSE_NOENT;
2612     }
2613     if (xmlGetWarningsDefaultValue == 0)
2614         ctxt->options |= XML_PARSE_NOWARNING;
2615 
2616     ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
2617     ctxt->vctxt.userData = ctxt;
2618     ctxt->vctxt.error = xmlParserValidityError;
2619     ctxt->vctxt.warning = xmlParserValidityWarning;
2620 
2621     ctxt->record_info = 0;
2622     ctxt->checkIndex = 0;
2623     ctxt->inSubset = 0;
2624     ctxt->errNo = XML_ERR_OK;
2625     ctxt->depth = 0;
2626     ctxt->catalogs = NULL;
2627     ctxt->sizeentities = 0;
2628     ctxt->sizeentcopy = 0;
2629     ctxt->input_id = 1;
2630     ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
2631     xmlInitNodeInfoSeq(&ctxt->node_seq);
2632 
2633     if (ctxt->nsdb == NULL) {
2634         ctxt->nsdb = xmlParserNsCreate();
2635         if (ctxt->nsdb == NULL)
2636             return(-1);
2637     }
2638 
2639     return(0);
2640 }
2641 
2642 /**
2643  * xmlInitParserCtxt:
2644  * @ctxt:  an XML parser context
2645  *
2646  * DEPRECATED: Internal function which will be made private in a future
2647  * version.
2648  *
2649  * Initialize a parser context
2650  *
2651  * Returns 0 in case of success and -1 in case of error
2652  */
2653 
2654 int
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)2655 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
2656 {
2657     return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
2658 }
2659 
2660 /**
2661  * xmlFreeParserCtxt:
2662  * @ctxt:  an XML parser context
2663  *
2664  * Free all the memory used by a parser context. However the parsed
2665  * document in ctxt->myDoc is not freed.
2666  */
2667 
2668 void
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)2669 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2670 {
2671     xmlParserInputPtr input;
2672 
2673     if (ctxt == NULL) return;
2674 
2675     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2676         xmlFreeInputStream(input);
2677     }
2678     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2679     if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2680     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2681     if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2682     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2683     if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2684     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2685     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2686     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2687 #ifdef LIBXML_SAX1_ENABLED
2688     if ((ctxt->sax != NULL) &&
2689         (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2690 #else
2691     if (ctxt->sax != NULL)
2692 #endif /* LIBXML_SAX1_ENABLED */
2693         xmlFree(ctxt->sax);
2694     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2695     if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2696     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2697     if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2698     if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2699     if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2700     if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2701     if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2702     if (ctxt->attsDefault != NULL)
2703         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2704     if (ctxt->attsSpecial != NULL)
2705         xmlHashFree(ctxt->attsSpecial, NULL);
2706     if (ctxt->freeElems != NULL) {
2707         xmlNodePtr cur, next;
2708 
2709 	cur = ctxt->freeElems;
2710 	while (cur != NULL) {
2711 	    next = cur->next;
2712 	    xmlFree(cur);
2713 	    cur = next;
2714 	}
2715     }
2716     if (ctxt->freeAttrs != NULL) {
2717         xmlAttrPtr cur, next;
2718 
2719 	cur = ctxt->freeAttrs;
2720 	while (cur != NULL) {
2721 	    next = cur->next;
2722 	    xmlFree(cur);
2723 	    cur = next;
2724 	}
2725     }
2726     /*
2727      * cleanup the error strings
2728      */
2729     if (ctxt->lastError.message != NULL)
2730         xmlFree(ctxt->lastError.message);
2731     if (ctxt->lastError.file != NULL)
2732         xmlFree(ctxt->lastError.file);
2733     if (ctxt->lastError.str1 != NULL)
2734         xmlFree(ctxt->lastError.str1);
2735     if (ctxt->lastError.str2 != NULL)
2736         xmlFree(ctxt->lastError.str2);
2737     if (ctxt->lastError.str3 != NULL)
2738         xmlFree(ctxt->lastError.str3);
2739 
2740 #ifdef LIBXML_CATALOG_ENABLED
2741     if (ctxt->catalogs != NULL)
2742 	xmlCatalogFreeLocal(ctxt->catalogs);
2743 #endif
2744     xmlFree(ctxt);
2745 }
2746 
2747 /**
2748  * xmlNewParserCtxt:
2749  *
2750  * Allocate and initialize a new parser context.
2751  *
2752  * Returns the xmlParserCtxtPtr or NULL
2753  */
2754 
2755 xmlParserCtxtPtr
xmlNewParserCtxt(void)2756 xmlNewParserCtxt(void)
2757 {
2758     return(xmlNewSAXParserCtxt(NULL, NULL));
2759 }
2760 
2761 /**
2762  * xmlNewSAXParserCtxt:
2763  * @sax:  SAX handler
2764  * @userData:  user data
2765  *
2766  * Allocate and initialize a new SAX parser context. If userData is NULL,
2767  * the parser context will be passed as user data.
2768  *
2769  * Available since 2.11.0. If you want support older versions,
2770  * it's best to invoke xmlNewParserCtxt and set ctxt->sax with
2771  * struct assignment.
2772  *
2773  * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2774  */
2775 
2776 xmlParserCtxtPtr
xmlNewSAXParserCtxt(const xmlSAXHandler * sax,void * userData)2777 xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2778 {
2779     xmlParserCtxtPtr ctxt;
2780 
2781     xmlInitParser();
2782 
2783     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2784     if (ctxt == NULL)
2785 	return(NULL);
2786     memset(ctxt, 0, sizeof(xmlParserCtxt));
2787     if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2788         xmlFreeParserCtxt(ctxt);
2789 	return(NULL);
2790     }
2791     return(ctxt);
2792 }
2793 
2794 /************************************************************************
2795  *									*
2796  *		Handling of node information				*
2797  *									*
2798  ************************************************************************/
2799 
2800 /**
2801  * xmlClearParserCtxt:
2802  * @ctxt:  an XML parser context
2803  *
2804  * Clear (release owned resources) and reinitialize a parser context
2805  */
2806 
2807 void
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)2808 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2809 {
2810   if (ctxt==NULL)
2811     return;
2812   xmlClearNodeInfoSeq(&ctxt->node_seq);
2813   xmlCtxtReset(ctxt);
2814 }
2815 
2816 
2817 /**
2818  * xmlParserFindNodeInfo:
2819  * @ctx:  an XML parser context
2820  * @node:  an XML node within the tree
2821  *
2822  * DEPRECATED: Don't use.
2823  *
2824  * Find the parser node info struct for a given node
2825  *
2826  * Returns an xmlParserNodeInfo block pointer or NULL
2827  */
2828 const xmlParserNodeInfo *
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx,xmlNodePtr node)2829 xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
2830 {
2831     unsigned long pos;
2832 
2833     if ((ctx == NULL) || (node == NULL))
2834         return (NULL);
2835     /* Find position where node should be at */
2836     pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2837     if (pos < ctx->node_seq.length
2838         && ctx->node_seq.buffer[pos].node == node)
2839         return &ctx->node_seq.buffer[pos];
2840     else
2841         return NULL;
2842 }
2843 
2844 
2845 /**
2846  * xmlInitNodeInfoSeq:
2847  * @seq:  a node info sequence pointer
2848  *
2849  * DEPRECATED: Don't use.
2850  *
2851  * -- Initialize (set to initial state) node info sequence
2852  */
2853 void
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)2854 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2855 {
2856     if (seq == NULL)
2857         return;
2858     seq->length = 0;
2859     seq->maximum = 0;
2860     seq->buffer = NULL;
2861 }
2862 
2863 /**
2864  * xmlClearNodeInfoSeq:
2865  * @seq:  a node info sequence pointer
2866  *
2867  * DEPRECATED: Don't use.
2868  *
2869  * -- Clear (release memory and reinitialize) node
2870  *   info sequence
2871  */
2872 void
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)2873 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2874 {
2875     if (seq == NULL)
2876         return;
2877     if (seq->buffer != NULL)
2878         xmlFree(seq->buffer);
2879     xmlInitNodeInfoSeq(seq);
2880 }
2881 
2882 /**
2883  * xmlParserFindNodeInfoIndex:
2884  * @seq:  a node info sequence pointer
2885  * @node:  an XML node pointer
2886  *
2887  * DEPRECATED: Don't use.
2888  *
2889  * xmlParserFindNodeInfoIndex : Find the index that the info record for
2890  *   the given node is or should be at in a sorted sequence
2891  *
2892  * Returns a long indicating the position of the record
2893  */
2894 unsigned long
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,xmlNodePtr node)2895 xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
2896                            xmlNodePtr node)
2897 {
2898     unsigned long upper, lower, middle;
2899     int found = 0;
2900 
2901     if ((seq == NULL) || (node == NULL))
2902         return ((unsigned long) -1);
2903 
2904     /* Do a binary search for the key */
2905     lower = 1;
2906     upper = seq->length;
2907     middle = 0;
2908     while (lower <= upper && !found) {
2909         middle = lower + (upper - lower) / 2;
2910         if (node == seq->buffer[middle - 1].node)
2911             found = 1;
2912         else if (node < seq->buffer[middle - 1].node)
2913             upper = middle - 1;
2914         else
2915             lower = middle + 1;
2916     }
2917 
2918     /* Return position */
2919     if (middle == 0 || seq->buffer[middle - 1].node < node)
2920         return middle;
2921     else
2922         return middle - 1;
2923 }
2924 
2925 
2926 /**
2927  * xmlParserAddNodeInfo:
2928  * @ctxt:  an XML parser context
2929  * @info:  a node info sequence pointer
2930  *
2931  * DEPRECATED: Don't use.
2932  *
2933  * Insert node info record into the sorted sequence
2934  */
2935 void
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,xmlParserNodeInfoPtr info)2936 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2937                      xmlParserNodeInfoPtr info)
2938 {
2939     unsigned long pos;
2940 
2941     if ((ctxt == NULL) || (info == NULL)) return;
2942 
2943     /* Find pos and check to see if node is already in the sequence */
2944     pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2945                                      info->node);
2946 
2947     if ((pos < ctxt->node_seq.length) &&
2948         (ctxt->node_seq.buffer != NULL) &&
2949         (ctxt->node_seq.buffer[pos].node == info->node)) {
2950         ctxt->node_seq.buffer[pos] = *info;
2951     }
2952 
2953     /* Otherwise, we need to add new node to buffer */
2954     else {
2955         if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2956 	    (ctxt->node_seq.buffer == NULL)) {
2957             xmlParserNodeInfo *tmp_buffer;
2958             unsigned int byte_size;
2959 
2960             if (ctxt->node_seq.maximum == 0)
2961                 ctxt->node_seq.maximum = 2;
2962             byte_size = (sizeof(*ctxt->node_seq.buffer) *
2963 			(2 * ctxt->node_seq.maximum));
2964 
2965             if (ctxt->node_seq.buffer == NULL)
2966                 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2967             else
2968                 tmp_buffer =
2969                     (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2970                                                      byte_size);
2971 
2972             if (tmp_buffer == NULL) {
2973 		xmlCtxtErrMemory(ctxt);
2974                 return;
2975             }
2976             ctxt->node_seq.buffer = tmp_buffer;
2977             ctxt->node_seq.maximum *= 2;
2978         }
2979 
2980         /* If position is not at end, move elements out of the way */
2981         if (pos != ctxt->node_seq.length) {
2982             unsigned long i;
2983 
2984             for (i = ctxt->node_seq.length; i > pos; i--)
2985                 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2986         }
2987 
2988         /* Copy element and increase length */
2989         ctxt->node_seq.buffer[pos] = *info;
2990         ctxt->node_seq.length++;
2991     }
2992 }
2993 
2994 /************************************************************************
2995  *									*
2996  *		Defaults settings					*
2997  *									*
2998  ************************************************************************/
2999 /**
3000  * xmlPedanticParserDefault:
3001  * @val:  int 0 or 1
3002  *
3003  * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
3004  *
3005  * Set and return the previous value for enabling pedantic warnings.
3006  *
3007  * Returns the last value for 0 for no substitution, 1 for substitution.
3008  */
3009 
3010 int
xmlPedanticParserDefault(int val)3011 xmlPedanticParserDefault(int val) {
3012     int old = xmlPedanticParserDefaultValue;
3013 
3014     xmlPedanticParserDefaultValue = val;
3015     return(old);
3016 }
3017 
3018 /**
3019  * xmlLineNumbersDefault:
3020  * @val:  int 0 or 1
3021  *
3022  * DEPRECATED: The modern options API always enables line numbers.
3023  *
3024  * Set and return the previous value for enabling line numbers in elements
3025  * contents. This may break on old application and is turned off by default.
3026  *
3027  * Returns the last value for 0 for no substitution, 1 for substitution.
3028  */
3029 
3030 int
xmlLineNumbersDefault(int val)3031 xmlLineNumbersDefault(int val) {
3032     int old = xmlLineNumbersDefaultValue;
3033 
3034     xmlLineNumbersDefaultValue = val;
3035     return(old);
3036 }
3037 
3038 /**
3039  * xmlSubstituteEntitiesDefault:
3040  * @val:  int 0 or 1
3041  *
3042  * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
3043  *
3044  * Set and return the previous value for default entity support.
3045  * Initially the parser always keep entity references instead of substituting
3046  * entity values in the output. This function has to be used to change the
3047  * default parser behavior
3048  * SAX::substituteEntities() has to be used for changing that on a file by
3049  * file basis.
3050  *
3051  * Returns the last value for 0 for no substitution, 1 for substitution.
3052  */
3053 
3054 int
xmlSubstituteEntitiesDefault(int val)3055 xmlSubstituteEntitiesDefault(int val) {
3056     int old = xmlSubstituteEntitiesDefaultValue;
3057 
3058     xmlSubstituteEntitiesDefaultValue = val;
3059     return(old);
3060 }
3061 
3062 /**
3063  * xmlKeepBlanksDefault:
3064  * @val:  int 0 or 1
3065  *
3066  * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
3067  *
3068  * Set and return the previous value for default blanks text nodes support.
3069  * The 1.x version of the parser used an heuristic to try to detect
3070  * ignorable white spaces. As a result the SAX callback was generating
3071  * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
3072  * using the DOM output text nodes containing those blanks were not generated.
3073  * The 2.x and later version will switch to the XML standard way and
3074  * ignorableWhitespace() are only generated when running the parser in
3075  * validating mode and when the current element doesn't allow CDATA or
3076  * mixed content.
3077  * This function is provided as a way to force the standard behavior
3078  * on 1.X libs and to switch back to the old mode for compatibility when
3079  * running 1.X client code on 2.X . Upgrade of 1.X code should be done
3080  * by using xmlIsBlankNode() commodity function to detect the "empty"
3081  * nodes generated.
3082  * This value also affect autogeneration of indentation when saving code
3083  * if blanks sections are kept, indentation is not generated.
3084  *
3085  * Returns the last value for 0 for no substitution, 1 for substitution.
3086  */
3087 
3088 int
xmlKeepBlanksDefault(int val)3089 xmlKeepBlanksDefault(int val) {
3090     int old = xmlKeepBlanksDefaultValue;
3091 
3092     xmlKeepBlanksDefaultValue = val;
3093 #ifdef LIBXML_OUTPUT_ENABLED
3094     if (!val)
3095         xmlIndentTreeOutput = 1;
3096 #endif
3097     return(old);
3098 }
3099 
3100