• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * parserInternals.c : Internal routines (and obsolete ones) needed for the
3  *                     XML and HTML parsers.
4  *
5  * See Copyright for the status of this software.
6  *
7  * daniel@veillard.com
8  */
9 
10 #define IN_LIBXML
11 #include "libxml.h"
12 
13 #if defined(_WIN32)
14 #define XML_DIR_SEP '\\'
15 #else
16 #define XML_DIR_SEP '/'
17 #endif
18 
19 #include <string.h>
20 #include <ctype.h>
21 #include <stdlib.h>
22 
23 #include <libxml/xmlmemory.h>
24 #include <libxml/tree.h>
25 #include <libxml/parser.h>
26 #include <libxml/parserInternals.h>
27 #include <libxml/entities.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/encoding.h>
30 #include <libxml/xmlIO.h>
31 #include <libxml/uri.h>
32 #include <libxml/dict.h>
33 #include <libxml/xmlsave.h>
34 #ifdef LIBXML_CATALOG_ENABLED
35 #include <libxml/catalog.h>
36 #endif
37 #include <libxml/chvalid.h>
38 
39 #define CUR(ctxt) ctxt->input->cur
40 #define END(ctxt) ctxt->input->end
41 
42 #include "private/buf.h"
43 #include "private/enc.h"
44 #include "private/error.h"
45 #include "private/io.h"
46 #include "private/parser.h"
47 
48 /*
49  * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
50  * factor of serialized output after entity expansion.
51  */
52 #define XML_MAX_AMPLIFICATION_DEFAULT 5
53 
54 /*
55  * Various global defaults for parsing
56  */
57 
58 /**
59  * xmlCheckVersion:
60  * @version: the include version number
61  *
62  * check the compiled lib version against the include one.
63  * This can warn or immediately kill the application
64  */
65 void
xmlCheckVersion(int version)66 xmlCheckVersion(int version) {
67     int myversion = LIBXML_VERSION;
68 
69     xmlInitParser();
70 
71     if ((myversion / 10000) != (version / 10000)) {
72 	xmlGenericError(xmlGenericErrorContext,
73 		"Fatal: program compiled against libxml %d using libxml %d\n",
74 		(version / 10000), (myversion / 10000));
75 	fprintf(stderr,
76 		"Fatal: program compiled against libxml %d using libxml %d\n",
77 		(version / 10000), (myversion / 10000));
78     }
79     if ((myversion / 100) < (version / 100)) {
80 	xmlGenericError(xmlGenericErrorContext,
81 		"Warning: program compiled against libxml %d using older %d\n",
82 		(version / 100), (myversion / 100));
83     }
84 }
85 
86 
87 /************************************************************************
88  *									*
89  *		Some factorized error routines				*
90  *									*
91  ************************************************************************/
92 
93 
94 /**
95  * xmlErrMemory:
96  * @ctxt:  an XML parser context
97  * @extra:  extra information
98  *
99  * Handle a redefinition of attribute error
100  */
101 void
xmlErrMemory(xmlParserCtxtPtr ctxt,const char * extra)102 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
103 {
104     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
105         (ctxt->instate == XML_PARSER_EOF))
106 	return;
107     if (ctxt != NULL) {
108         ctxt->errNo = XML_ERR_NO_MEMORY;
109         ctxt->instate = XML_PARSER_EOF;
110         ctxt->disableSAX = 1;
111     }
112     if (extra)
113         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
114                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
115                         NULL, NULL, 0, 0,
116                         "Memory allocation failed : %s\n", extra);
117     else
118         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
119                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
120                         NULL, NULL, 0, 0, "Memory allocation failed\n");
121 }
122 
123 /**
124  * __xmlErrEncoding:
125  * @ctxt:  an XML parser context
126  * @xmlerr:  the error number
127  * @msg:  the error message
128  * @str1:  an string info
129  * @str2:  an string info
130  *
131  * Handle an encoding error
132  */
133 void
__xmlErrEncoding(xmlParserCtxtPtr ctxt,xmlParserErrors xmlerr,const char * msg,const xmlChar * str1,const xmlChar * str2)134 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
135                  const char *msg, const xmlChar * str1, const xmlChar * str2)
136 {
137     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
138         (ctxt->instate == XML_PARSER_EOF))
139 	return;
140     if (ctxt != NULL)
141         ctxt->errNo = xmlerr;
142     __xmlRaiseError(NULL, NULL, NULL,
143                     ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
144                     NULL, 0, (const char *) str1, (const char *) str2,
145                     NULL, 0, 0, msg, str1, str2);
146     if (ctxt != NULL) {
147         ctxt->wellFormed = 0;
148         if (ctxt->recovery == 0)
149             ctxt->disableSAX = 1;
150     }
151 }
152 
153 /**
154  * xmlErrInternal:
155  * @ctxt:  an XML parser context
156  * @msg:  the error message
157  * @str:  error information
158  *
159  * Handle an internal error
160  */
161 static void LIBXML_ATTR_FORMAT(2,0)
xmlErrInternal(xmlParserCtxtPtr ctxt,const char * msg,const xmlChar * str)162 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
163 {
164     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
165         (ctxt->instate == XML_PARSER_EOF))
166 	return;
167     if (ctxt != NULL)
168         ctxt->errNo = XML_ERR_INTERNAL_ERROR;
169     __xmlRaiseError(NULL, NULL, NULL,
170                     ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
171                     XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
172                     0, 0, msg, str);
173     if (ctxt != NULL) {
174         ctxt->wellFormed = 0;
175         if (ctxt->recovery == 0)
176             ctxt->disableSAX = 1;
177     }
178 }
179 
180 /**
181  * xmlFatalErr:
182  * @ctxt:  an XML parser context
183  * @error:  the error number
184  * @info:  extra information string
185  *
186  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
187  */
188 void
xmlFatalErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * info)189 xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
190 {
191     const char *errmsg;
192 
193     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
194         (ctxt->instate == XML_PARSER_EOF))
195 	return;
196     switch (error) {
197         case XML_ERR_INVALID_HEX_CHARREF:
198             errmsg = "CharRef: invalid hexadecimal value";
199             break;
200         case XML_ERR_INVALID_DEC_CHARREF:
201             errmsg = "CharRef: invalid decimal value";
202             break;
203         case XML_ERR_INVALID_CHARREF:
204             errmsg = "CharRef: invalid value";
205             break;
206         case XML_ERR_INTERNAL_ERROR:
207             errmsg = "internal error";
208             break;
209         case XML_ERR_PEREF_AT_EOF:
210             errmsg = "PEReference at end of document";
211             break;
212         case XML_ERR_PEREF_IN_PROLOG:
213             errmsg = "PEReference in prolog";
214             break;
215         case XML_ERR_PEREF_IN_EPILOG:
216             errmsg = "PEReference in epilog";
217             break;
218         case XML_ERR_PEREF_NO_NAME:
219             errmsg = "PEReference: no name";
220             break;
221         case XML_ERR_PEREF_SEMICOL_MISSING:
222             errmsg = "PEReference: expecting ';'";
223             break;
224         case XML_ERR_ENTITY_LOOP:
225             errmsg = "Detected an entity reference loop";
226             break;
227         case XML_ERR_ENTITY_NOT_STARTED:
228             errmsg = "EntityValue: \" or ' expected";
229             break;
230         case XML_ERR_ENTITY_PE_INTERNAL:
231             errmsg = "PEReferences forbidden in internal subset";
232             break;
233         case XML_ERR_ENTITY_NOT_FINISHED:
234             errmsg = "EntityValue: \" or ' expected";
235             break;
236         case XML_ERR_ATTRIBUTE_NOT_STARTED:
237             errmsg = "AttValue: \" or ' expected";
238             break;
239         case XML_ERR_LT_IN_ATTRIBUTE:
240             errmsg = "Unescaped '<' not allowed in attributes values";
241             break;
242         case XML_ERR_LITERAL_NOT_STARTED:
243             errmsg = "SystemLiteral \" or ' expected";
244             break;
245         case XML_ERR_LITERAL_NOT_FINISHED:
246             errmsg = "Unfinished System or Public ID \" or ' expected";
247             break;
248         case XML_ERR_MISPLACED_CDATA_END:
249             errmsg = "Sequence ']]>' not allowed in content";
250             break;
251         case XML_ERR_URI_REQUIRED:
252             errmsg = "SYSTEM or PUBLIC, the URI is missing";
253             break;
254         case XML_ERR_PUBID_REQUIRED:
255             errmsg = "PUBLIC, the Public Identifier is missing";
256             break;
257         case XML_ERR_HYPHEN_IN_COMMENT:
258             errmsg = "Comment must not contain '--' (double-hyphen)";
259             break;
260         case XML_ERR_PI_NOT_STARTED:
261             errmsg = "xmlParsePI : no target name";
262             break;
263         case XML_ERR_RESERVED_XML_NAME:
264             errmsg = "Invalid PI name";
265             break;
266         case XML_ERR_NOTATION_NOT_STARTED:
267             errmsg = "NOTATION: Name expected here";
268             break;
269         case XML_ERR_NOTATION_NOT_FINISHED:
270             errmsg = "'>' required to close NOTATION declaration";
271             break;
272         case XML_ERR_VALUE_REQUIRED:
273             errmsg = "Entity value required";
274             break;
275         case XML_ERR_URI_FRAGMENT:
276             errmsg = "Fragment not allowed";
277             break;
278         case XML_ERR_ATTLIST_NOT_STARTED:
279             errmsg = "'(' required to start ATTLIST enumeration";
280             break;
281         case XML_ERR_NMTOKEN_REQUIRED:
282             errmsg = "NmToken expected in ATTLIST enumeration";
283             break;
284         case XML_ERR_ATTLIST_NOT_FINISHED:
285             errmsg = "')' required to finish ATTLIST enumeration";
286             break;
287         case XML_ERR_MIXED_NOT_STARTED:
288             errmsg = "MixedContentDecl : '|' or ')*' expected";
289             break;
290         case XML_ERR_PCDATA_REQUIRED:
291             errmsg = "MixedContentDecl : '#PCDATA' expected";
292             break;
293         case XML_ERR_ELEMCONTENT_NOT_STARTED:
294             errmsg = "ContentDecl : Name or '(' expected";
295             break;
296         case XML_ERR_ELEMCONTENT_NOT_FINISHED:
297             errmsg = "ContentDecl : ',' '|' or ')' expected";
298             break;
299         case XML_ERR_PEREF_IN_INT_SUBSET:
300             errmsg =
301                 "PEReference: forbidden within markup decl in internal subset";
302             break;
303         case XML_ERR_GT_REQUIRED:
304             errmsg = "expected '>'";
305             break;
306         case XML_ERR_CONDSEC_INVALID:
307             errmsg = "XML conditional section '[' expected";
308             break;
309         case XML_ERR_EXT_SUBSET_NOT_FINISHED:
310             errmsg = "Content error in the external subset";
311             break;
312         case XML_ERR_CONDSEC_INVALID_KEYWORD:
313             errmsg =
314                 "conditional section INCLUDE or IGNORE keyword expected";
315             break;
316         case XML_ERR_CONDSEC_NOT_FINISHED:
317             errmsg = "XML conditional section not closed";
318             break;
319         case XML_ERR_XMLDECL_NOT_STARTED:
320             errmsg = "Text declaration '<?xml' required";
321             break;
322         case XML_ERR_XMLDECL_NOT_FINISHED:
323             errmsg = "parsing XML declaration: '?>' expected";
324             break;
325         case XML_ERR_EXT_ENTITY_STANDALONE:
326             errmsg = "external parsed entities cannot be standalone";
327             break;
328         case XML_ERR_ENTITYREF_SEMICOL_MISSING:
329             errmsg = "EntityRef: expecting ';'";
330             break;
331         case XML_ERR_DOCTYPE_NOT_FINISHED:
332             errmsg = "DOCTYPE improperly terminated";
333             break;
334         case XML_ERR_LTSLASH_REQUIRED:
335             errmsg = "EndTag: '</' not found";
336             break;
337         case XML_ERR_EQUAL_REQUIRED:
338             errmsg = "expected '='";
339             break;
340         case XML_ERR_STRING_NOT_CLOSED:
341             errmsg = "String not closed expecting \" or '";
342             break;
343         case XML_ERR_STRING_NOT_STARTED:
344             errmsg = "String not started expecting ' or \"";
345             break;
346         case XML_ERR_ENCODING_NAME:
347             errmsg = "Invalid XML encoding name";
348             break;
349         case XML_ERR_STANDALONE_VALUE:
350             errmsg = "standalone accepts only 'yes' or 'no'";
351             break;
352         case XML_ERR_DOCUMENT_EMPTY:
353             errmsg = "Document is empty";
354             break;
355         case XML_ERR_DOCUMENT_END:
356             errmsg = "Extra content at the end of the document";
357             break;
358         case XML_ERR_NOT_WELL_BALANCED:
359             errmsg = "chunk is not well balanced";
360             break;
361         case XML_ERR_EXTRA_CONTENT:
362             errmsg = "extra content at the end of well balanced chunk";
363             break;
364         case XML_ERR_VERSION_MISSING:
365             errmsg = "Malformed declaration expecting version";
366             break;
367         case XML_ERR_NAME_TOO_LONG:
368             errmsg = "Name too long";
369             break;
370         case XML_ERR_INVALID_ENCODING:
371             errmsg = "Invalid bytes in character encoding";
372             break;
373         case XML_IO_UNKNOWN:
374             errmsg = "I/O error";
375             break;
376 #if 0
377         case:
378             errmsg = "";
379             break;
380 #endif
381         default:
382             errmsg = "Unregistered error message";
383     }
384     if (ctxt != NULL)
385 	ctxt->errNo = error;
386     if (info == NULL) {
387         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
388                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
389                         errmsg);
390     } else {
391         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
392                         XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
393                         errmsg, info);
394     }
395     if (ctxt != NULL) {
396 	ctxt->wellFormed = 0;
397 	if (ctxt->recovery == 0)
398 	    ctxt->disableSAX = 1;
399     }
400 }
401 
402 /**
403  * xmlErrEncodingInt:
404  * @ctxt:  an XML parser context
405  * @error:  the error number
406  * @msg:  the error message
407  * @val:  an integer value
408  *
409  * n encoding error
410  */
411 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrEncodingInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)412 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
413                   const char *msg, int val)
414 {
415     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
416         (ctxt->instate == XML_PARSER_EOF))
417 	return;
418     if (ctxt != NULL)
419         ctxt->errNo = error;
420     __xmlRaiseError(NULL, NULL, NULL,
421                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
422                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
423     if (ctxt != NULL) {
424         ctxt->wellFormed = 0;
425         if (ctxt->recovery == 0)
426             ctxt->disableSAX = 1;
427     }
428 }
429 
430 /**
431  * xmlIsLetter:
432  * @c:  an unicode character (int)
433  *
434  * Check whether the character is allowed by the production
435  * [84] Letter ::= BaseChar | Ideographic
436  *
437  * Returns 0 if not, non-zero otherwise
438  */
439 int
xmlIsLetter(int c)440 xmlIsLetter(int c) {
441     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
442 }
443 
444 /************************************************************************
445  *									*
446  *		Input handling functions for progressive parsing	*
447  *									*
448  ************************************************************************/
449 
450 /* we need to keep enough input to show errors in context */
451 #define LINE_LEN        80
452 
453 /**
454  * xmlHaltParser:
455  * @ctxt:  an XML parser context
456  *
457  * Blocks further parser processing don't override error
458  * for internal use
459  */
460 void
xmlHaltParser(xmlParserCtxtPtr ctxt)461 xmlHaltParser(xmlParserCtxtPtr ctxt) {
462     if (ctxt == NULL)
463         return;
464     ctxt->instate = XML_PARSER_EOF;
465     ctxt->disableSAX = 1;
466     while (ctxt->inputNr > 1)
467         xmlFreeInputStream(inputPop(ctxt));
468     if (ctxt->input != NULL) {
469         /*
470 	 * in case there was a specific allocation deallocate before
471 	 * overriding base
472 	 */
473         if (ctxt->input->free != NULL) {
474 	    ctxt->input->free((xmlChar *) ctxt->input->base);
475 	    ctxt->input->free = NULL;
476 	}
477         if (ctxt->input->buf != NULL) {
478             xmlFreeParserInputBuffer(ctxt->input->buf);
479             ctxt->input->buf = NULL;
480         }
481 	ctxt->input->cur = BAD_CAST"";
482         ctxt->input->length = 0;
483 	ctxt->input->base = ctxt->input->cur;
484         ctxt->input->end = ctxt->input->cur;
485     }
486 }
487 
488 /**
489  * xmlParserInputRead:
490  * @in:  an XML parser input
491  * @len:  an indicative size for the lookahead
492  *
493  * DEPRECATED: This function was internal and is deprecated.
494  *
495  * Returns -1 as this is an error to use it.
496  */
497 int
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED,int len ATTRIBUTE_UNUSED)498 xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
499     return(-1);
500 }
501 
502 /**
503  * xmlParserGrow:
504  * @ctxt:  an XML parser context
505  *
506  * Grow the input buffer.
507  *
508  * Returns the number of bytes read or -1 in case of error.
509  */
510 int
xmlParserGrow(xmlParserCtxtPtr ctxt)511 xmlParserGrow(xmlParserCtxtPtr ctxt) {
512     xmlParserInputPtr in = ctxt->input;
513     xmlParserInputBufferPtr buf = in->buf;
514     ptrdiff_t curEnd = in->end - in->cur;
515     ptrdiff_t curBase = in->cur - in->base;
516     int ret;
517 
518     if (buf == NULL)
519         return(0);
520     /* Don't grow push parser buffer. */
521     if ((ctxt->progressive) && (ctxt->inputNr <= 1))
522         return(0);
523     if (buf->error != 0)
524         return(-1);
525 
526     if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
527          (curBase > XML_MAX_LOOKUP_LIMIT)) &&
528         ((ctxt->options & XML_PARSE_HUGE) == 0)) {
529         xmlErrMemory(ctxt, "Huge input lookup");
530         xmlHaltParser(ctxt);
531 	return(-1);
532     }
533 
534     if (curEnd >= INPUT_CHUNK)
535         return(0);
536 
537     ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
538     xmlBufUpdateInput(buf->buffer, in, curBase);
539 
540     if (ret < 0) {
541         xmlFatalErr(ctxt, buf->error, NULL);
542         /* Buffer contents may be lost in case of memory errors. */
543         if (buf->error == XML_ERR_NO_MEMORY)
544             xmlHaltParser(ctxt);
545     }
546 
547     return(ret);
548 }
549 
550 /**
551  * xmlParserInputGrow:
552  * @in:  an XML parser input
553  * @len:  an indicative size for the lookahead
554  *
555  * DEPRECATED: Don't use.
556  *
557  * This function increase the input for the parser. It tries to
558  * preserve pointers to the input buffer, and keep already read data
559  *
560  * Returns the amount of char read, or -1 in case of error, 0 indicate the
561  * end of this entity
562  */
563 int
xmlParserInputGrow(xmlParserInputPtr in,int len)564 xmlParserInputGrow(xmlParserInputPtr in, int len) {
565     int ret;
566     size_t indx;
567 
568     if ((in == NULL) || (len < 0)) return(-1);
569     if (in->buf == NULL) return(-1);
570     if (in->base == NULL) return(-1);
571     if (in->cur == NULL) return(-1);
572     if (in->buf->buffer == NULL) return(-1);
573 
574     indx = in->cur - in->base;
575     if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
576         return(0);
577     }
578     ret = xmlParserInputBufferGrow(in->buf, len);
579 
580     in->base = xmlBufContent(in->buf->buffer);
581     if (in->base == NULL) {
582         in->base = BAD_CAST "";
583         in->cur = in->base;
584         in->end = in->base;
585         return(-1);
586     }
587     in->cur = in->base + indx;
588     in->end = xmlBufEnd(in->buf->buffer);
589 
590     return(ret);
591 }
592 
593 /**
594  * xmlParserShrink:
595  * @ctxt:  an XML parser context
596  *
597  * Shrink the input buffer.
598  */
599 void
xmlParserShrink(xmlParserCtxtPtr ctxt)600 xmlParserShrink(xmlParserCtxtPtr ctxt) {
601     xmlParserInputPtr in = ctxt->input;
602     xmlParserInputBufferPtr buf = in->buf;
603     size_t used;
604 
605     if (buf == NULL)
606         return;
607 
608     used = in->cur - in->base;
609     /*
610      * Do not shrink on large buffers whose only a tiny fraction
611      * was consumed
612      */
613     if (used > INPUT_CHUNK) {
614 	size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
615 
616 	if (res > 0) {
617             used -= res;
618             if ((res > ULONG_MAX) ||
619                 (in->consumed > ULONG_MAX - (unsigned long)res))
620                 in->consumed = ULONG_MAX;
621             else
622                 in->consumed += res;
623 	}
624     }
625 
626     xmlBufUpdateInput(buf->buffer, in, used);
627 }
628 
629 /**
630  * xmlParserInputShrink:
631  * @in:  an XML parser input
632  *
633  * DEPRECATED: Don't use.
634  *
635  * This function removes used input for the parser.
636  */
637 void
xmlParserInputShrink(xmlParserInputPtr in)638 xmlParserInputShrink(xmlParserInputPtr in) {
639     size_t used;
640     size_t ret;
641 
642     if (in == NULL) return;
643     if (in->buf == NULL) return;
644     if (in->base == NULL) return;
645     if (in->cur == NULL) return;
646     if (in->buf->buffer == NULL) return;
647 
648     used = in->cur - in->base;
649     /*
650      * Do not shrink on large buffers whose only a tiny fraction
651      * was consumed
652      */
653     if (used > INPUT_CHUNK) {
654 	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
655 	if (ret > 0) {
656             used -= ret;
657             if ((ret > ULONG_MAX) ||
658                 (in->consumed > ULONG_MAX - (unsigned long)ret))
659                 in->consumed = ULONG_MAX;
660             else
661                 in->consumed += ret;
662 	}
663     }
664 
665     if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
666         xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
667     }
668 
669     in->base = xmlBufContent(in->buf->buffer);
670     if (in->base == NULL) {
671         /* TODO: raise error */
672         in->base = BAD_CAST "";
673         in->cur = in->base;
674         in->end = in->base;
675         return;
676     }
677     in->cur = in->base + used;
678     in->end = xmlBufEnd(in->buf->buffer);
679 }
680 
681 /************************************************************************
682  *									*
683  *		UTF8 character input and related functions		*
684  *									*
685  ************************************************************************/
686 
687 /**
688  * xmlNextChar:
689  * @ctxt:  the XML parser context
690  *
691  * DEPRECATED: Internal function, do not use.
692  *
693  * Skip to the next char input char.
694  */
695 
696 void
xmlNextChar(xmlParserCtxtPtr ctxt)697 xmlNextChar(xmlParserCtxtPtr ctxt)
698 {
699     const unsigned char *cur;
700     size_t avail;
701     int c;
702 
703     if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
704         (ctxt->input == NULL))
705         return;
706 
707     avail = ctxt->input->end - ctxt->input->cur;
708 
709     if (avail < INPUT_CHUNK) {
710         xmlParserGrow(ctxt);
711         if ((ctxt->instate == XML_PARSER_EOF) ||
712             (ctxt->input->cur >= ctxt->input->end))
713             return;
714         avail = ctxt->input->end - ctxt->input->cur;
715     }
716 
717     cur = ctxt->input->cur;
718     c = *cur;
719 
720     if (c < 0x80) {
721         if (c == '\n') {
722             ctxt->input->cur++;
723             ctxt->input->line++;
724             ctxt->input->col = 1;
725         } else if (c == '\r') {
726             /*
727              *   2.11 End-of-Line Handling
728              *   the literal two-character sequence "#xD#xA" or a standalone
729              *   literal #xD, an XML processor must pass to the application
730              *   the single character #xA.
731              */
732             ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
733             ctxt->input->line++;
734             ctxt->input->col = 1;
735             return;
736         } else {
737             ctxt->input->cur++;
738             ctxt->input->col++;
739         }
740     } else {
741         ctxt->input->col++;
742 
743         if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
744             goto encoding_error;
745 
746         if (c < 0xe0) {
747             /* 2-byte code */
748             if (c < 0xc2)
749                 goto encoding_error;
750             ctxt->input->cur += 2;
751         } else {
752             unsigned int val = (c << 8) | cur[1];
753 
754             if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
755                 goto encoding_error;
756 
757             if (c < 0xf0) {
758                 /* 3-byte code */
759                 if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
760                     goto encoding_error;
761                 ctxt->input->cur += 3;
762             } else {
763                 if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
764                     goto encoding_error;
765 
766                 /* 4-byte code */
767                 if ((val < 0xf090) || (val >= 0xf490))
768                     goto encoding_error;
769                 ctxt->input->cur += 4;
770             }
771         }
772     }
773 
774     return;
775 
776 encoding_error:
777     /* Only report the first error */
778     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
779         if ((ctxt == NULL) || (ctxt->input == NULL) ||
780             (ctxt->input->end - ctxt->input->cur < 4)) {
781             __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
782                          "Input is not proper UTF-8, indicate encoding !\n",
783                          NULL, NULL);
784         } else {
785             char buffer[150];
786 
787             snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
788                             ctxt->input->cur[0], ctxt->input->cur[1],
789                             ctxt->input->cur[2], ctxt->input->cur[3]);
790             __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
791                          "Input is not proper UTF-8, indicate encoding !\n%s",
792                          BAD_CAST buffer, NULL);
793         }
794         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
795     }
796     ctxt->input->cur++;
797     return;
798 }
799 
800 /**
801  * xmlCurrentChar:
802  * @ctxt:  the XML parser context
803  * @len:  pointer to the length of the char read
804  *
805  * DEPRECATED: Internal function, do not use.
806  *
807  * The current char value, if using UTF-8 this may actually span multiple
808  * bytes in the input buffer. Implement the end of line normalization:
809  * 2.11 End-of-Line Handling
810  * Wherever an external parsed entity or the literal entity value
811  * of an internal parsed entity contains either the literal two-character
812  * sequence "#xD#xA" or a standalone literal #xD, an XML processor
813  * must pass to the application the single character #xA.
814  * This behavior can conveniently be produced by normalizing all
815  * line breaks to #xA on input, before parsing.)
816  *
817  * Returns the current char value and its length
818  */
819 
820 int
xmlCurrentChar(xmlParserCtxtPtr ctxt,int * len)821 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
822     const unsigned char *cur;
823     size_t avail;
824     int c;
825 
826     if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
827     if (ctxt->instate == XML_PARSER_EOF)
828 	return(0);
829 
830     avail = ctxt->input->end - ctxt->input->cur;
831 
832     if (avail < INPUT_CHUNK) {
833         xmlParserGrow(ctxt);
834         if (ctxt->instate == XML_PARSER_EOF)
835             return(0);
836         avail = ctxt->input->end - ctxt->input->cur;
837     }
838 
839     cur = ctxt->input->cur;
840     c = *cur;
841 
842     if (c < 0x80) {
843 	/* 1-byte code */
844         if (c < 0x20) {
845             /*
846              *   2.11 End-of-Line Handling
847              *   the literal two-character sequence "#xD#xA" or a standalone
848              *   literal #xD, an XML processor must pass to the application
849              *   the single character #xA.
850              */
851             if (c == '\r') {
852                 *len = ((cur[1] == '\n') ? 2 : 1);
853                 c = '\n';
854             } else if (c == 0) {
855                 if (ctxt->input->cur >= ctxt->input->end) {
856                     *len = 0;
857                 } else {
858                     *len = 1;
859                     /*
860                      * TODO: Null bytes should be handled by callers,
861                      * but this can be tricky.
862                      */
863                     xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
864                             "Char 0x0 out of allowed range\n", c);
865                 }
866             } else {
867                 *len = 1;
868             }
869         } else {
870             *len = 1;
871         }
872 
873         return(c);
874     } else {
875         int val;
876 
877         if (avail < 2)
878             goto incomplete_sequence;
879         if ((cur[1] & 0xc0) != 0x80)
880             goto encoding_error;
881 
882         if (c < 0xe0) {
883             /* 2-byte code */
884             if (c < 0xc2)
885                 goto encoding_error;
886             val = (c & 0x1f) << 6;
887             val |= cur[1] & 0x3f;
888             *len = 2;
889         } else {
890             if (avail < 3)
891                 goto incomplete_sequence;
892             if ((cur[2] & 0xc0) != 0x80)
893                 goto encoding_error;
894 
895             if (c < 0xf0) {
896                 /* 3-byte code */
897                 val = (c & 0xf) << 12;
898                 val |= (cur[1] & 0x3f) << 6;
899                 val |= cur[2] & 0x3f;
900                 if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
901                     goto encoding_error;
902                 *len = 3;
903             } else {
904                 if (avail < 4)
905                     goto incomplete_sequence;
906                 if ((cur[3] & 0xc0) != 0x80)
907                     goto encoding_error;
908 
909                 /* 4-byte code */
910                 val = (c & 0x0f) << 18;
911                 val |= (cur[1] & 0x3f) << 12;
912                 val |= (cur[2] & 0x3f) << 6;
913                 val |= cur[3] & 0x3f;
914                 if ((val < 0x10000) || (val >= 0x110000))
915                     goto encoding_error;
916                 *len = 4;
917             }
918         }
919 
920         return(val);
921     }
922 
923 encoding_error:
924     /* Only report the first error */
925     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
926         if (ctxt->input->end - ctxt->input->cur < 4) {
927             __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
928                          "Input is not proper UTF-8, indicate encoding !\n",
929                          NULL, NULL);
930         } else {
931             char buffer[150];
932 
933             snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
934                             ctxt->input->cur[0], ctxt->input->cur[1],
935                             ctxt->input->cur[2], ctxt->input->cur[3]);
936             __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
937                          "Input is not proper UTF-8, indicate encoding !\n%s",
938                          BAD_CAST buffer, NULL);
939         }
940         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
941     }
942     *len = 1;
943     return(0xFFFD); /* U+FFFD Replacement Character */
944 
945 incomplete_sequence:
946     /*
947      * An encoding problem may arise from a truncated input buffer
948      * splitting a character in the middle. In that case do not raise
949      * an error but return 0. This should only happen when push parsing
950      * char data.
951      */
952     *len = 0;
953     return(0);
954 }
955 
956 /**
957  * xmlStringCurrentChar:
958  * @ctxt:  the XML parser context
959  * @cur:  pointer to the beginning of the char
960  * @len:  pointer to the length of the char read
961  *
962  * DEPRECATED: Internal function, do not use.
963  *
964  * The current char value, if using UTF-8 this may actually span multiple
965  * bytes in the input buffer.
966  *
967  * Returns the current char value and its length
968  */
969 
970 int
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,const xmlChar * cur,int * len)971 xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
972                      const xmlChar *cur, int *len) {
973     int c;
974 
975     if ((cur == NULL) || (len == NULL))
976         return(0);
977 
978     /* cur is zero-terminated, so we can lie about its length. */
979     *len = 4;
980     c = xmlGetUTF8Char(cur, len);
981 
982     return((c < 0) ? 0 : c);
983 }
984 
985 /**
986  * xmlCopyCharMultiByte:
987  * @out:  pointer to an array of xmlChar
988  * @val:  the char value
989  *
990  * append the char value in the array
991  *
992  * Returns the number of xmlChar written
993  */
994 int
xmlCopyCharMultiByte(xmlChar * out,int val)995 xmlCopyCharMultiByte(xmlChar *out, int val) {
996     if ((out == NULL) || (val < 0)) return(0);
997     /*
998      * We are supposed to handle UTF8, check it's valid
999      * From rfc2044: encoding of the Unicode values on UTF-8:
1000      *
1001      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1002      * 0000 0000-0000 007F   0xxxxxxx
1003      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1004      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1005      */
1006     if  (val >= 0x80) {
1007 	xmlChar *savedout = out;
1008 	int bits;
1009 	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1010 	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1011 	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1012 	else {
1013 	    xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
1014 		    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
1015 			      val);
1016 	    return(0);
1017 	}
1018 	for ( ; bits >= 0; bits-= 6)
1019 	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
1020 	return (out - savedout);
1021     }
1022     *out = val;
1023     return 1;
1024 }
1025 
1026 /**
1027  * xmlCopyChar:
1028  * @len:  Ignored, compatibility
1029  * @out:  pointer to an array of xmlChar
1030  * @val:  the char value
1031  *
1032  * append the char value in the array
1033  *
1034  * Returns the number of xmlChar written
1035  */
1036 
1037 int
xmlCopyChar(int len ATTRIBUTE_UNUSED,xmlChar * out,int val)1038 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1039     if ((out == NULL) || (val < 0)) return(0);
1040     /* the len parameter is ignored */
1041     if  (val >= 0x80) {
1042 	return(xmlCopyCharMultiByte (out, val));
1043     }
1044     *out = val;
1045     return 1;
1046 }
1047 
1048 /************************************************************************
1049  *									*
1050  *		Commodity functions to switch encodings			*
1051  *									*
1052  ************************************************************************/
1053 
1054 static xmlCharEncodingHandlerPtr
xmlDetectEBCDIC(xmlParserInputPtr input)1055 xmlDetectEBCDIC(xmlParserInputPtr input) {
1056     xmlChar out[200];
1057     xmlCharEncodingHandlerPtr handler;
1058     int inlen, outlen, res, i;
1059 
1060     /*
1061      * To detect the EBCDIC code page, we convert the first 200 bytes
1062      * to EBCDIC-US and try to find the encoding declaration.
1063      */
1064     handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1065     if (handler == NULL)
1066         return(NULL);
1067     outlen = sizeof(out) - 1;
1068     inlen = input->end - input->cur;
1069     res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
1070     if (res < 0)
1071         return(handler);
1072     out[outlen] = 0;
1073 
1074     for (i = 0; i < outlen; i++) {
1075         if (out[i] == '>')
1076             break;
1077         if ((out[i] == 'e') &&
1078             (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1079             int start, cur, quote;
1080 
1081             i += 8;
1082             while (IS_BLANK_CH(out[i]))
1083                 i += 1;
1084             if (out[i++] != '=')
1085                 break;
1086             while (IS_BLANK_CH(out[i]))
1087                 i += 1;
1088             quote = out[i++];
1089             if ((quote != '\'') && (quote != '"'))
1090                 break;
1091             start = i;
1092             cur = out[i];
1093             while (((cur >= 'a') && (cur <= 'z')) ||
1094                    ((cur >= 'A') && (cur <= 'Z')) ||
1095                    ((cur >= '0') && (cur <= '9')) ||
1096                    (cur == '.') || (cur == '_') ||
1097                    (cur == '-'))
1098                 cur = out[++i];
1099             if (cur != quote)
1100                 break;
1101             out[i] = 0;
1102             xmlCharEncCloseFunc(handler);
1103             return(xmlFindCharEncodingHandler((char *) out + start));
1104         }
1105     }
1106 
1107     /*
1108      * ICU handlers are stateful, so we have to recreate them.
1109      */
1110     xmlCharEncCloseFunc(handler);
1111     return(xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC));
1112 }
1113 
1114 /**
1115  * xmlSwitchEncoding:
1116  * @ctxt:  the parser context
1117  * @enc:  the encoding value (number)
1118  *
1119  * Use encoding specified by enum to decode input data.
1120  *
1121  * This function can be used to enforce the encoding of chunks passed
1122  * to xmlParseChunk.
1123  *
1124  * Returns 0 in case of success, -1 otherwise
1125  */
1126 int
xmlSwitchEncoding(xmlParserCtxtPtr ctxt,xmlCharEncoding enc)1127 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1128 {
1129     xmlCharEncodingHandlerPtr handler = NULL;
1130     int check = 1;
1131     int ret;
1132 
1133     if ((ctxt == NULL) || (ctxt->input == NULL))
1134         return(-1);
1135 
1136     switch (enc) {
1137 	case XML_CHAR_ENCODING_NONE:
1138 	case XML_CHAR_ENCODING_UTF8:
1139         case XML_CHAR_ENCODING_ASCII:
1140             check = 0;
1141             break;
1142         case XML_CHAR_ENCODING_EBCDIC:
1143             handler = xmlDetectEBCDIC(ctxt->input);
1144             break;
1145         default:
1146             handler = xmlGetCharEncodingHandler(enc);
1147             break;
1148     }
1149 
1150     if ((check) && (handler == NULL)) {
1151         const char *name = xmlGetCharEncodingName(enc);
1152 
1153         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1154                 "encoding not supported: %s\n",
1155                 BAD_CAST (name ? name : "<null>"), NULL);
1156         /*
1157          * TODO: We could recover from errors in external entities
1158          * if we didn't stop the parser. But most callers of this
1159          * function don't check the return value.
1160          */
1161         xmlStopParser(ctxt);
1162         return(-1);
1163     }
1164 
1165     ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1166 
1167     if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1168         ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1169     }
1170 
1171     return(ret);
1172 }
1173 
1174 /**
1175  * xmlSwitchInputEncoding:
1176  * @ctxt:  the parser context
1177  * @input:  the input stream
1178  * @handler:  the encoding handler
1179  *
1180  * DEPRECATED: Internal function, don't use.
1181  *
1182  * Use encoding handler to decode input data.
1183  *
1184  * Returns 0 in case of success, -1 otherwise
1185  */
1186 int
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,xmlCharEncodingHandlerPtr handler)1187 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1188                        xmlCharEncodingHandlerPtr handler)
1189 {
1190     int nbchars;
1191     xmlParserInputBufferPtr in;
1192 
1193     if ((input == NULL) || (input->buf == NULL)) {
1194         xmlCharEncCloseFunc(handler);
1195 	return (-1);
1196     }
1197     in = input->buf;
1198 
1199     input->flags |= XML_INPUT_HAS_ENCODING;
1200 
1201     /*
1202      * UTF-8 requires no encoding handler.
1203      */
1204     if ((handler != NULL) &&
1205         (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1206         xmlCharEncCloseFunc(handler);
1207         handler = NULL;
1208     }
1209 
1210     if (in->encoder == handler)
1211         return (0);
1212 
1213     if (in->encoder != NULL) {
1214         /*
1215          * Switching encodings during parsing is a really bad idea,
1216          * but Chromium can switch between ISO-8859-1 and UTF-16 before
1217          * separate calls to xmlParseChunk.
1218          *
1219          * TODO: We should check whether the "raw" input buffer is empty and
1220          * convert the old content using the old encoder.
1221          */
1222 
1223         xmlCharEncCloseFunc(in->encoder);
1224         in->encoder = handler;
1225         return (0);
1226     }
1227 
1228     in->encoder = handler;
1229 
1230     /*
1231      * Is there already some content down the pipe to convert ?
1232      */
1233     if (xmlBufIsEmpty(in->buffer) == 0) {
1234         size_t processed;
1235 
1236         /*
1237          * Shrink the current input buffer.
1238          * Move it as the raw buffer and create a new input buffer
1239          */
1240         processed = input->cur - input->base;
1241         xmlBufShrink(in->buffer, processed);
1242         input->consumed += processed;
1243         in->raw = in->buffer;
1244         in->buffer = xmlBufCreate();
1245         in->rawconsumed = processed;
1246 
1247         nbchars = xmlCharEncInput(in);
1248         xmlBufResetInput(in->buffer, input);
1249         if (nbchars < 0) {
1250             /* TODO: This could be an out of memory or an encoding error. */
1251             xmlErrInternal(ctxt,
1252                            "switching encoding: encoder error\n",
1253                            NULL);
1254             xmlHaltParser(ctxt);
1255             return (-1);
1256         }
1257     }
1258     return (0);
1259 }
1260 
1261 /**
1262  * xmlSwitchToEncoding:
1263  * @ctxt:  the parser context
1264  * @handler:  the encoding handler
1265  *
1266  * Use encoding handler to decode input data.
1267  *
1268  * This function can be used to enforce the encoding of chunks passed
1269  * to xmlParseChunk.
1270  *
1271  * Returns 0 in case of success, -1 otherwise
1272  */
1273 int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt,xmlCharEncodingHandlerPtr handler)1274 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1275 {
1276     if (ctxt == NULL)
1277         return(-1);
1278     return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1279 }
1280 
1281 /**
1282  * xmlDetectEncoding:
1283  * @ctxt:  the parser context
1284  *
1285  * Handle optional BOM, detect and switch to encoding.
1286  *
1287  * Assumes that there are at least four bytes in the input buffer.
1288  */
1289 void
xmlDetectEncoding(xmlParserCtxtPtr ctxt)1290 xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1291     const xmlChar *in;
1292     xmlCharEncoding enc;
1293     int bomSize;
1294     int autoFlag = 0;
1295 
1296     if (xmlParserGrow(ctxt) < 0)
1297         return;
1298     in = ctxt->input->cur;
1299     if (ctxt->input->end - in < 4)
1300         return;
1301 
1302     if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1303         /*
1304          * If the encoding was already set, only skip the BOM which was
1305          * possibly decoded to UTF-8.
1306          */
1307         if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1308             ctxt->input->cur += 3;
1309         }
1310 
1311         return;
1312     }
1313 
1314     enc = XML_CHAR_ENCODING_NONE;
1315     bomSize = 0;
1316 
1317     switch (in[0]) {
1318         case 0x00:
1319             if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1320                 enc = XML_CHAR_ENCODING_UCS4BE;
1321                 autoFlag = XML_INPUT_AUTO_OTHER;
1322             } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1323                 enc = XML_CHAR_ENCODING_UTF16BE;
1324                 autoFlag = XML_INPUT_AUTO_UTF16BE;
1325             }
1326             break;
1327 
1328         case 0x3C:
1329             if (in[1] == 0x00) {
1330                 if ((in[2] == 0x00) && (in[3] == 0x00)) {
1331                     enc = XML_CHAR_ENCODING_UCS4LE;
1332                     autoFlag = XML_INPUT_AUTO_OTHER;
1333                 } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1334                     enc = XML_CHAR_ENCODING_UTF16LE;
1335                     autoFlag = XML_INPUT_AUTO_UTF16LE;
1336                 }
1337             }
1338             break;
1339 
1340         case 0x4C:
1341 	    if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1342 	        enc = XML_CHAR_ENCODING_EBCDIC;
1343                 autoFlag = XML_INPUT_AUTO_OTHER;
1344             }
1345             break;
1346 
1347         case 0xEF:
1348             if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1349                 enc = XML_CHAR_ENCODING_UTF8;
1350                 autoFlag = XML_INPUT_AUTO_UTF8;
1351                 bomSize = 3;
1352             }
1353             break;
1354 
1355         case 0xFE:
1356             if (in[1] == 0xFF) {
1357                 enc = XML_CHAR_ENCODING_UTF16BE;
1358                 autoFlag = XML_INPUT_AUTO_UTF16BE;
1359                 bomSize = 2;
1360             }
1361             break;
1362 
1363         case 0xFF:
1364             if (in[1] == 0xFE) {
1365                 enc = XML_CHAR_ENCODING_UTF16LE;
1366                 autoFlag = XML_INPUT_AUTO_UTF16LE;
1367                 bomSize = 2;
1368             }
1369             break;
1370     }
1371 
1372     if (bomSize > 0) {
1373         ctxt->input->cur += bomSize;
1374     }
1375 
1376     if (enc != XML_CHAR_ENCODING_NONE) {
1377         ctxt->input->flags |= autoFlag;
1378         xmlSwitchEncoding(ctxt, enc);
1379     }
1380 }
1381 
1382 /**
1383  * xmlSetDeclaredEncoding:
1384  * @ctxt:  the parser context
1385  * @encoding:  declared encoding
1386  *
1387  * Set the encoding from a declaration in the document.
1388  *
1389  * If no encoding was set yet, switch the encoding. Otherwise, only warn
1390  * about encoding mismatches.
1391  *
1392  * Takes ownership of 'encoding'.
1393  */
1394 void
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt,xmlChar * encoding)1395 xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1396     if (ctxt->encoding != NULL)
1397         xmlFree((xmlChar *) ctxt->encoding);
1398     ctxt->encoding = encoding;
1399 
1400     if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1401         ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1402         xmlCharEncodingHandlerPtr handler;
1403 
1404         handler = xmlFindCharEncodingHandler((const char *) encoding);
1405         if (handler == NULL) {
1406             __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1407                              "Unsupported encoding: %s\n",
1408                              encoding, NULL);
1409             return;
1410         }
1411 
1412         xmlSwitchToEncoding(ctxt, handler);
1413         ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1414     } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1415         static const char *allowedUTF8[] = {
1416             "UTF-8", "UTF8", NULL
1417         };
1418         static const char *allowedUTF16LE[] = {
1419             "UTF-16", "UTF-16LE", "UTF16", NULL
1420         };
1421         static const char *allowedUTF16BE[] = {
1422             "UTF-16", "UTF-16BE", "UTF16", NULL
1423         };
1424         const char **allowed = NULL;
1425         const char *autoEnc = NULL;
1426 
1427         switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1428             case XML_INPUT_AUTO_UTF8:
1429                 allowed = allowedUTF8;
1430                 autoEnc = "UTF-8";
1431                 break;
1432             case XML_INPUT_AUTO_UTF16LE:
1433                 allowed = allowedUTF16LE;
1434                 autoEnc = "UTF-16LE";
1435                 break;
1436             case XML_INPUT_AUTO_UTF16BE:
1437                 allowed = allowedUTF16BE;
1438                 autoEnc = "UTF-16BE";
1439                 break;
1440         }
1441 
1442         if (allowed != NULL) {
1443             const char **p;
1444             int match = 0;
1445 
1446             for (p = allowed; *p != NULL; p++) {
1447                 if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1448                     match = 1;
1449                     break;
1450                 }
1451             }
1452 
1453             if (match == 0) {
1454                 xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1455                               "Encoding '%s' doesn't match "
1456                               "auto-detected '%s'\n",
1457                               encoding, BAD_CAST autoEnc);
1458             }
1459         }
1460     }
1461 }
1462 
1463 /************************************************************************
1464  *									*
1465  *	Commodity functions to handle entities processing		*
1466  *									*
1467  ************************************************************************/
1468 
1469 /**
1470  * xmlFreeInputStream:
1471  * @input:  an xmlParserInputPtr
1472  *
1473  * Free up an input stream.
1474  */
1475 void
xmlFreeInputStream(xmlParserInputPtr input)1476 xmlFreeInputStream(xmlParserInputPtr input) {
1477     if (input == NULL) return;
1478 
1479     if (input->filename != NULL) xmlFree((char *) input->filename);
1480     if (input->directory != NULL) xmlFree((char *) input->directory);
1481     if (input->version != NULL) xmlFree((char *) input->version);
1482     if ((input->free != NULL) && (input->base != NULL))
1483         input->free((xmlChar *) input->base);
1484     if (input->buf != NULL)
1485         xmlFreeParserInputBuffer(input->buf);
1486     xmlFree(input);
1487 }
1488 
1489 /**
1490  * xmlNewInputStream:
1491  * @ctxt:  an XML parser context
1492  *
1493  * Create a new input stream structure.
1494  *
1495  * Returns the new input stream or NULL
1496  */
1497 xmlParserInputPtr
xmlNewInputStream(xmlParserCtxtPtr ctxt)1498 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1499     xmlParserInputPtr input;
1500 
1501     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1502     if (input == NULL) {
1503         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1504 	return(NULL);
1505     }
1506     memset(input, 0, sizeof(xmlParserInput));
1507     input->line = 1;
1508     input->col = 1;
1509 
1510     /*
1511      * If the context is NULL the id cannot be initialized, but that
1512      * should not happen while parsing which is the situation where
1513      * the id is actually needed.
1514      */
1515     if (ctxt != NULL) {
1516         if (input->id >= INT_MAX) {
1517             xmlErrMemory(ctxt, "Input ID overflow\n");
1518             return(NULL);
1519         }
1520         input->id = ctxt->input_id++;
1521     }
1522 
1523     return(input);
1524 }
1525 
1526 /**
1527  * xmlNewIOInputStream:
1528  * @ctxt:  an XML parser context
1529  * @input:  an I/O Input
1530  * @enc:  the charset encoding if known
1531  *
1532  * Create a new input stream structure encapsulating the @input into
1533  * a stream suitable for the parser.
1534  *
1535  * Returns the new input stream or NULL
1536  */
1537 xmlParserInputPtr
xmlNewIOInputStream(xmlParserCtxtPtr ctxt,xmlParserInputBufferPtr input,xmlCharEncoding enc)1538 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1539 	            xmlCharEncoding enc) {
1540     xmlParserInputPtr inputStream;
1541 
1542     if (input == NULL) return(NULL);
1543     if (xmlParserDebugEntities)
1544 	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1545     inputStream = xmlNewInputStream(ctxt);
1546     if (inputStream == NULL) {
1547 	return(NULL);
1548     }
1549     inputStream->filename = NULL;
1550     inputStream->buf = input;
1551     xmlBufResetInput(inputStream->buf->buffer, inputStream);
1552 
1553     if (enc != XML_CHAR_ENCODING_NONE) {
1554         xmlSwitchEncoding(ctxt, enc);
1555     }
1556 
1557     return(inputStream);
1558 }
1559 
1560 /**
1561  * xmlNewEntityInputStream:
1562  * @ctxt:  an XML parser context
1563  * @entity:  an Entity pointer
1564  *
1565  * DEPRECATED: Internal function, do not use.
1566  *
1567  * Create a new input stream based on an xmlEntityPtr
1568  *
1569  * Returns the new input stream or NULL
1570  */
1571 xmlParserInputPtr
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)1572 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1573     xmlParserInputPtr input;
1574 
1575     if (entity == NULL) {
1576         xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1577 	               NULL);
1578 	return(NULL);
1579     }
1580     if (xmlParserDebugEntities)
1581 	xmlGenericError(xmlGenericErrorContext,
1582 		"new input from entity: %s\n", entity->name);
1583     if (entity->content == NULL) {
1584 	switch (entity->etype) {
1585             case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1586 	        xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1587 		               entity->name);
1588                 break;
1589             case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1590             case XML_EXTERNAL_PARAMETER_ENTITY:
1591 		input = xmlLoadExternalEntity((char *) entity->URI,
1592 		       (char *) entity->ExternalID, ctxt);
1593                 if (input != NULL)
1594                     input->entity = entity;
1595                 return(input);
1596             case XML_INTERNAL_GENERAL_ENTITY:
1597 	        xmlErrInternal(ctxt,
1598 		      "Internal entity %s without content !\n",
1599 		               entity->name);
1600                 break;
1601             case XML_INTERNAL_PARAMETER_ENTITY:
1602 	        xmlErrInternal(ctxt,
1603 		      "Internal parameter entity %s without content !\n",
1604 		               entity->name);
1605                 break;
1606             case XML_INTERNAL_PREDEFINED_ENTITY:
1607 	        xmlErrInternal(ctxt,
1608 		      "Predefined entity %s without content !\n",
1609 		               entity->name);
1610                 break;
1611 	}
1612 	return(NULL);
1613     }
1614     input = xmlNewInputStream(ctxt);
1615     if (input == NULL) {
1616 	return(NULL);
1617     }
1618     if (entity->URI != NULL)
1619 	input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1620     input->base = entity->content;
1621     if (entity->length == 0)
1622         entity->length = xmlStrlen(entity->content);
1623     input->cur = entity->content;
1624     input->length = entity->length;
1625     input->end = &entity->content[input->length];
1626     input->entity = entity;
1627     return(input);
1628 }
1629 
1630 /**
1631  * xmlNewStringInputStream:
1632  * @ctxt:  an XML parser context
1633  * @buffer:  an memory buffer
1634  *
1635  * Create a new input stream based on a memory buffer.
1636  * Returns the new input stream
1637  */
1638 xmlParserInputPtr
xmlNewStringInputStream(xmlParserCtxtPtr ctxt,const xmlChar * buffer)1639 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1640     xmlParserInputPtr input;
1641     xmlParserInputBufferPtr buf;
1642 
1643     if (buffer == NULL) {
1644         xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1645 	               NULL);
1646 	return(NULL);
1647     }
1648     if (xmlParserDebugEntities)
1649 	xmlGenericError(xmlGenericErrorContext,
1650 		"new fixed input: %.30s\n", buffer);
1651     buf = xmlParserInputBufferCreateString(buffer);
1652     if (buf == NULL) {
1653 	xmlErrMemory(ctxt, NULL);
1654         return(NULL);
1655     }
1656     input = xmlNewInputStream(ctxt);
1657     if (input == NULL) {
1658         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1659 	xmlFreeParserInputBuffer(buf);
1660 	return(NULL);
1661     }
1662     input->buf = buf;
1663     xmlBufResetInput(input->buf->buffer, input);
1664     return(input);
1665 }
1666 
1667 /**
1668  * xmlNewInputFromFile:
1669  * @ctxt:  an XML parser context
1670  * @filename:  the filename to use as entity
1671  *
1672  * Create a new input stream based on a file or an URL.
1673  *
1674  * Returns the new input stream or NULL in case of error
1675  */
1676 xmlParserInputPtr
xmlNewInputFromFile(xmlParserCtxtPtr ctxt,const char * filename)1677 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1678     xmlParserInputBufferPtr buf;
1679     xmlParserInputPtr inputStream;
1680     char *directory = NULL;
1681     xmlChar *URI = NULL;
1682 
1683     if (xmlParserDebugEntities)
1684 	xmlGenericError(xmlGenericErrorContext,
1685 		"new input from file: %s\n", filename);
1686     if (ctxt == NULL) return(NULL);
1687     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1688     if (buf == NULL) {
1689 	if (filename == NULL)
1690 	    __xmlLoaderErr(ctxt,
1691 	                   "failed to load external entity: NULL filename \n",
1692 			   NULL);
1693 	else
1694 	    __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1695 			   (const char *) filename);
1696 	return(NULL);
1697     }
1698 
1699     inputStream = xmlNewInputStream(ctxt);
1700     if (inputStream == NULL) {
1701 	xmlFreeParserInputBuffer(buf);
1702 	return(NULL);
1703     }
1704 
1705     inputStream->buf = buf;
1706     inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1707     if (inputStream == NULL)
1708         return(NULL);
1709 
1710     if (inputStream->filename == NULL)
1711 	URI = xmlStrdup((xmlChar *) filename);
1712     else
1713 	URI = xmlStrdup((xmlChar *) inputStream->filename);
1714     directory = xmlParserGetDirectory((const char *) URI);
1715     if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1716     inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1717     if (URI != NULL) xmlFree((char *) URI);
1718     inputStream->directory = directory;
1719 
1720     xmlBufResetInput(inputStream->buf->buffer, inputStream);
1721     if ((ctxt->directory == NULL) && (directory != NULL))
1722         ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1723     return(inputStream);
1724 }
1725 
1726 /************************************************************************
1727  *									*
1728  *		Commodity functions to handle parser contexts		*
1729  *									*
1730  ************************************************************************/
1731 
1732 /**
1733  * xmlInitSAXParserCtxt:
1734  * @ctxt:  XML parser context
1735  * @sax:  SAX handlert
1736  * @userData:  user data
1737  *
1738  * Initialize a SAX parser context
1739  *
1740  * Returns 0 in case of success and -1 in case of error
1741  */
1742 
1743 static int
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt,const xmlSAXHandler * sax,void * userData)1744 xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1745                      void *userData)
1746 {
1747     xmlParserInputPtr input;
1748 
1749     if(ctxt==NULL) {
1750         xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1751         return(-1);
1752     }
1753 
1754     xmlInitParser();
1755 
1756     if (ctxt->dict == NULL)
1757 	ctxt->dict = xmlDictCreate();
1758     if (ctxt->dict == NULL) {
1759         xmlErrMemory(NULL, "cannot initialize parser context\n");
1760 	return(-1);
1761     }
1762     xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1763 
1764     if (ctxt->sax == NULL)
1765 	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1766     if (ctxt->sax == NULL) {
1767         xmlErrMemory(NULL, "cannot initialize parser context\n");
1768 	return(-1);
1769     }
1770     if (sax == NULL) {
1771 	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1772         xmlSAXVersion(ctxt->sax, 2);
1773         ctxt->userData = ctxt;
1774     } else {
1775 	if (sax->initialized == XML_SAX2_MAGIC) {
1776 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1777         } else {
1778 	    memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1779 	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1780         }
1781         ctxt->userData = userData ? userData : ctxt;
1782     }
1783 
1784     ctxt->maxatts = 0;
1785     ctxt->atts = NULL;
1786     /* Allocate the Input stack */
1787     if (ctxt->inputTab == NULL) {
1788 	ctxt->inputTab = (xmlParserInputPtr *)
1789 		    xmlMalloc(5 * sizeof(xmlParserInputPtr));
1790 	ctxt->inputMax = 5;
1791     }
1792     if (ctxt->inputTab == NULL) {
1793         xmlErrMemory(NULL, "cannot initialize parser context\n");
1794 	ctxt->inputNr = 0;
1795 	ctxt->inputMax = 0;
1796 	ctxt->input = NULL;
1797 	return(-1);
1798     }
1799     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1800         xmlFreeInputStream(input);
1801     }
1802     ctxt->inputNr = 0;
1803     ctxt->input = NULL;
1804 
1805     ctxt->version = NULL;
1806     ctxt->encoding = NULL;
1807     ctxt->standalone = -1;
1808     ctxt->hasExternalSubset = 0;
1809     ctxt->hasPErefs = 0;
1810     ctxt->html = 0;
1811     ctxt->external = 0;
1812     ctxt->instate = XML_PARSER_START;
1813     ctxt->token = 0;
1814     ctxt->directory = NULL;
1815 
1816     /* Allocate the Node stack */
1817     if (ctxt->nodeTab == NULL) {
1818 	ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1819 	ctxt->nodeMax = 10;
1820     }
1821     if (ctxt->nodeTab == NULL) {
1822         xmlErrMemory(NULL, "cannot initialize parser context\n");
1823 	ctxt->nodeNr = 0;
1824 	ctxt->nodeMax = 0;
1825 	ctxt->node = NULL;
1826 	ctxt->inputNr = 0;
1827 	ctxt->inputMax = 0;
1828 	ctxt->input = NULL;
1829 	return(-1);
1830     }
1831     ctxt->nodeNr = 0;
1832     ctxt->node = NULL;
1833 
1834     /* Allocate the Name stack */
1835     if (ctxt->nameTab == NULL) {
1836 	ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1837 	ctxt->nameMax = 10;
1838     }
1839     if (ctxt->nameTab == NULL) {
1840         xmlErrMemory(NULL, "cannot initialize parser context\n");
1841 	ctxt->nodeNr = 0;
1842 	ctxt->nodeMax = 0;
1843 	ctxt->node = NULL;
1844 	ctxt->inputNr = 0;
1845 	ctxt->inputMax = 0;
1846 	ctxt->input = NULL;
1847 	ctxt->nameNr = 0;
1848 	ctxt->nameMax = 0;
1849 	ctxt->name = NULL;
1850 	return(-1);
1851     }
1852     ctxt->nameNr = 0;
1853     ctxt->name = NULL;
1854 
1855     /* Allocate the space stack */
1856     if (ctxt->spaceTab == NULL) {
1857 	ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1858 	ctxt->spaceMax = 10;
1859     }
1860     if (ctxt->spaceTab == NULL) {
1861         xmlErrMemory(NULL, "cannot initialize parser context\n");
1862 	ctxt->nodeNr = 0;
1863 	ctxt->nodeMax = 0;
1864 	ctxt->node = NULL;
1865 	ctxt->inputNr = 0;
1866 	ctxt->inputMax = 0;
1867 	ctxt->input = NULL;
1868 	ctxt->nameNr = 0;
1869 	ctxt->nameMax = 0;
1870 	ctxt->name = NULL;
1871 	ctxt->spaceNr = 0;
1872 	ctxt->spaceMax = 0;
1873 	ctxt->space = NULL;
1874 	return(-1);
1875     }
1876     ctxt->spaceNr = 1;
1877     ctxt->spaceMax = 10;
1878     ctxt->spaceTab[0] = -1;
1879     ctxt->space = &ctxt->spaceTab[0];
1880     ctxt->myDoc = NULL;
1881     ctxt->wellFormed = 1;
1882     ctxt->nsWellFormed = 1;
1883     ctxt->valid = 1;
1884     ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1885     if (ctxt->loadsubset) {
1886         ctxt->options |= XML_PARSE_DTDLOAD;
1887     }
1888     ctxt->validate = xmlDoValidityCheckingDefaultValue;
1889     ctxt->pedantic = xmlPedanticParserDefaultValue;
1890     if (ctxt->pedantic) {
1891         ctxt->options |= XML_PARSE_PEDANTIC;
1892     }
1893     ctxt->linenumbers = xmlLineNumbersDefaultValue;
1894     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1895     if (ctxt->keepBlanks == 0) {
1896 	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1897 	ctxt->options |= XML_PARSE_NOBLANKS;
1898     }
1899 
1900     ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1901     ctxt->vctxt.userData = ctxt;
1902     ctxt->vctxt.error = xmlParserValidityError;
1903     ctxt->vctxt.warning = xmlParserValidityWarning;
1904     if (ctxt->validate) {
1905 	if (xmlGetWarningsDefaultValue == 0)
1906 	    ctxt->vctxt.warning = NULL;
1907 	else
1908 	    ctxt->vctxt.warning = xmlParserValidityWarning;
1909 	ctxt->vctxt.nodeMax = 0;
1910         ctxt->options |= XML_PARSE_DTDVALID;
1911     }
1912     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1913     if (ctxt->replaceEntities) {
1914         ctxt->options |= XML_PARSE_NOENT;
1915     }
1916     ctxt->record_info = 0;
1917     ctxt->checkIndex = 0;
1918     ctxt->inSubset = 0;
1919     ctxt->errNo = XML_ERR_OK;
1920     ctxt->depth = 0;
1921     ctxt->catalogs = NULL;
1922     ctxt->sizeentities = 0;
1923     ctxt->sizeentcopy = 0;
1924     ctxt->input_id = 1;
1925     ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
1926     xmlInitNodeInfoSeq(&ctxt->node_seq);
1927 
1928     if (ctxt->nsdb == NULL) {
1929         ctxt->nsdb = xmlParserNsCreate();
1930         if (ctxt->nsdb == NULL) {
1931             xmlErrMemory(ctxt, NULL);
1932             return(-1);
1933         }
1934     }
1935 
1936     return(0);
1937 }
1938 
1939 /**
1940  * xmlInitParserCtxt:
1941  * @ctxt:  an XML parser context
1942  *
1943  * DEPRECATED: Internal function which will be made private in a future
1944  * version.
1945  *
1946  * Initialize a parser context
1947  *
1948  * Returns 0 in case of success and -1 in case of error
1949  */
1950 
1951 int
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)1952 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1953 {
1954     return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1955 }
1956 
1957 /**
1958  * xmlFreeParserCtxt:
1959  * @ctxt:  an XML parser context
1960  *
1961  * Free all the memory used by a parser context. However the parsed
1962  * document in ctxt->myDoc is not freed.
1963  */
1964 
1965 void
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)1966 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1967 {
1968     xmlParserInputPtr input;
1969 
1970     if (ctxt == NULL) return;
1971 
1972     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1973         xmlFreeInputStream(input);
1974     }
1975     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1976     if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1977     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1978     if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1979     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1980     if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1981     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1982     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1983     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1984 #ifdef LIBXML_SAX1_ENABLED
1985     if ((ctxt->sax != NULL) &&
1986         (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1987 #else
1988     if (ctxt->sax != NULL)
1989 #endif /* LIBXML_SAX1_ENABLED */
1990         xmlFree(ctxt->sax);
1991     if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1992     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1993     if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1994     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1995     if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
1996     if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
1997     if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
1998     if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1999     if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2000     if (ctxt->attsDefault != NULL)
2001         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2002     if (ctxt->attsSpecial != NULL)
2003         xmlHashFree(ctxt->attsSpecial, NULL);
2004     if (ctxt->freeElems != NULL) {
2005         xmlNodePtr cur, next;
2006 
2007 	cur = ctxt->freeElems;
2008 	while (cur != NULL) {
2009 	    next = cur->next;
2010 	    xmlFree(cur);
2011 	    cur = next;
2012 	}
2013     }
2014     if (ctxt->freeAttrs != NULL) {
2015         xmlAttrPtr cur, next;
2016 
2017 	cur = ctxt->freeAttrs;
2018 	while (cur != NULL) {
2019 	    next = cur->next;
2020 	    xmlFree(cur);
2021 	    cur = next;
2022 	}
2023     }
2024     /*
2025      * cleanup the error strings
2026      */
2027     if (ctxt->lastError.message != NULL)
2028         xmlFree(ctxt->lastError.message);
2029     if (ctxt->lastError.file != NULL)
2030         xmlFree(ctxt->lastError.file);
2031     if (ctxt->lastError.str1 != NULL)
2032         xmlFree(ctxt->lastError.str1);
2033     if (ctxt->lastError.str2 != NULL)
2034         xmlFree(ctxt->lastError.str2);
2035     if (ctxt->lastError.str3 != NULL)
2036         xmlFree(ctxt->lastError.str3);
2037 
2038 #ifdef LIBXML_CATALOG_ENABLED
2039     if (ctxt->catalogs != NULL)
2040 	xmlCatalogFreeLocal(ctxt->catalogs);
2041 #endif
2042     xmlFree(ctxt);
2043 }
2044 
2045 /**
2046  * xmlNewParserCtxt:
2047  *
2048  * Allocate and initialize a new parser context.
2049  *
2050  * Returns the xmlParserCtxtPtr or NULL
2051  */
2052 
2053 xmlParserCtxtPtr
xmlNewParserCtxt(void)2054 xmlNewParserCtxt(void)
2055 {
2056     return(xmlNewSAXParserCtxt(NULL, NULL));
2057 }
2058 
2059 /**
2060  * xmlNewSAXParserCtxt:
2061  * @sax:  SAX handler
2062  * @userData:  user data
2063  *
2064  * Allocate and initialize a new SAX parser context. If userData is NULL,
2065  * the parser context will be passed as user data.
2066  *
2067  * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2068  */
2069 
2070 xmlParserCtxtPtr
xmlNewSAXParserCtxt(const xmlSAXHandler * sax,void * userData)2071 xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2072 {
2073     xmlParserCtxtPtr ctxt;
2074 
2075     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2076     if (ctxt == NULL) {
2077 	xmlErrMemory(NULL, "cannot allocate parser context\n");
2078 	return(NULL);
2079     }
2080     memset(ctxt, 0, sizeof(xmlParserCtxt));
2081     if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2082         xmlFreeParserCtxt(ctxt);
2083 	return(NULL);
2084     }
2085     return(ctxt);
2086 }
2087 
2088 /************************************************************************
2089  *									*
2090  *		Handling of node information				*
2091  *									*
2092  ************************************************************************/
2093 
2094 /**
2095  * xmlClearParserCtxt:
2096  * @ctxt:  an XML parser context
2097  *
2098  * Clear (release owned resources) and reinitialize a parser context
2099  */
2100 
2101 void
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)2102 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2103 {
2104   if (ctxt==NULL)
2105     return;
2106   xmlClearNodeInfoSeq(&ctxt->node_seq);
2107   xmlCtxtReset(ctxt);
2108 }
2109 
2110 
2111 /**
2112  * xmlParserFindNodeInfo:
2113  * @ctx:  an XML parser context
2114  * @node:  an XML node within the tree
2115  *
2116  * DEPRECATED: Don't use.
2117  *
2118  * Find the parser node info struct for a given node
2119  *
2120  * Returns an xmlParserNodeInfo block pointer or NULL
2121  */
2122 const xmlParserNodeInfo *
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx,const xmlNodePtr node)2123 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
2124 {
2125     unsigned long pos;
2126 
2127     if ((ctx == NULL) || (node == NULL))
2128         return (NULL);
2129     /* Find position where node should be at */
2130     pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2131     if (pos < ctx->node_seq.length
2132         && ctx->node_seq.buffer[pos].node == node)
2133         return &ctx->node_seq.buffer[pos];
2134     else
2135         return NULL;
2136 }
2137 
2138 
2139 /**
2140  * xmlInitNodeInfoSeq:
2141  * @seq:  a node info sequence pointer
2142  *
2143  * DEPRECATED: Don't use.
2144  *
2145  * -- Initialize (set to initial state) node info sequence
2146  */
2147 void
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)2148 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2149 {
2150     if (seq == NULL)
2151         return;
2152     seq->length = 0;
2153     seq->maximum = 0;
2154     seq->buffer = NULL;
2155 }
2156 
2157 /**
2158  * xmlClearNodeInfoSeq:
2159  * @seq:  a node info sequence pointer
2160  *
2161  * DEPRECATED: Don't use.
2162  *
2163  * -- Clear (release memory and reinitialize) node
2164  *   info sequence
2165  */
2166 void
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)2167 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2168 {
2169     if (seq == NULL)
2170         return;
2171     if (seq->buffer != NULL)
2172         xmlFree(seq->buffer);
2173     xmlInitNodeInfoSeq(seq);
2174 }
2175 
2176 /**
2177  * xmlParserFindNodeInfoIndex:
2178  * @seq:  a node info sequence pointer
2179  * @node:  an XML node pointer
2180  *
2181  * DEPRECATED: Don't use.
2182  *
2183  * xmlParserFindNodeInfoIndex : Find the index that the info record for
2184  *   the given node is or should be at in a sorted sequence
2185  *
2186  * Returns a long indicating the position of the record
2187  */
2188 unsigned long
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,const xmlNodePtr node)2189 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2190                            const xmlNodePtr node)
2191 {
2192     unsigned long upper, lower, middle;
2193     int found = 0;
2194 
2195     if ((seq == NULL) || (node == NULL))
2196         return ((unsigned long) -1);
2197 
2198     /* Do a binary search for the key */
2199     lower = 1;
2200     upper = seq->length;
2201     middle = 0;
2202     while (lower <= upper && !found) {
2203         middle = lower + (upper - lower) / 2;
2204         if (node == seq->buffer[middle - 1].node)
2205             found = 1;
2206         else if (node < seq->buffer[middle - 1].node)
2207             upper = middle - 1;
2208         else
2209             lower = middle + 1;
2210     }
2211 
2212     /* Return position */
2213     if (middle == 0 || seq->buffer[middle - 1].node < node)
2214         return middle;
2215     else
2216         return middle - 1;
2217 }
2218 
2219 
2220 /**
2221  * xmlParserAddNodeInfo:
2222  * @ctxt:  an XML parser context
2223  * @info:  a node info sequence pointer
2224  *
2225  * DEPRECATED: Don't use.
2226  *
2227  * Insert node info record into the sorted sequence
2228  */
2229 void
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,const xmlParserNodeInfoPtr info)2230 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2231                      const xmlParserNodeInfoPtr info)
2232 {
2233     unsigned long pos;
2234 
2235     if ((ctxt == NULL) || (info == NULL)) return;
2236 
2237     /* Find pos and check to see if node is already in the sequence */
2238     pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2239                                      info->node);
2240 
2241     if ((pos < ctxt->node_seq.length) &&
2242         (ctxt->node_seq.buffer != NULL) &&
2243         (ctxt->node_seq.buffer[pos].node == info->node)) {
2244         ctxt->node_seq.buffer[pos] = *info;
2245     }
2246 
2247     /* Otherwise, we need to add new node to buffer */
2248     else {
2249         if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2250 	    (ctxt->node_seq.buffer == NULL)) {
2251             xmlParserNodeInfo *tmp_buffer;
2252             unsigned int byte_size;
2253 
2254             if (ctxt->node_seq.maximum == 0)
2255                 ctxt->node_seq.maximum = 2;
2256             byte_size = (sizeof(*ctxt->node_seq.buffer) *
2257 			(2 * ctxt->node_seq.maximum));
2258 
2259             if (ctxt->node_seq.buffer == NULL)
2260                 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2261             else
2262                 tmp_buffer =
2263                     (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2264                                                      byte_size);
2265 
2266             if (tmp_buffer == NULL) {
2267 		xmlErrMemory(ctxt, "failed to allocate buffer\n");
2268                 return;
2269             }
2270             ctxt->node_seq.buffer = tmp_buffer;
2271             ctxt->node_seq.maximum *= 2;
2272         }
2273 
2274         /* If position is not at end, move elements out of the way */
2275         if (pos != ctxt->node_seq.length) {
2276             unsigned long i;
2277 
2278             for (i = ctxt->node_seq.length; i > pos; i--)
2279                 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2280         }
2281 
2282         /* Copy element and increase length */
2283         ctxt->node_seq.buffer[pos] = *info;
2284         ctxt->node_seq.length++;
2285     }
2286 }
2287 
2288 /************************************************************************
2289  *									*
2290  *		Defaults settings					*
2291  *									*
2292  ************************************************************************/
2293 /**
2294  * xmlPedanticParserDefault:
2295  * @val:  int 0 or 1
2296  *
2297  * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2298  *
2299  * Set and return the previous value for enabling pedantic warnings.
2300  *
2301  * Returns the last value for 0 for no substitution, 1 for substitution.
2302  */
2303 
2304 int
xmlPedanticParserDefault(int val)2305 xmlPedanticParserDefault(int val) {
2306     int old = xmlPedanticParserDefaultValue;
2307 
2308     xmlPedanticParserDefaultValue = val;
2309     return(old);
2310 }
2311 
2312 /**
2313  * xmlLineNumbersDefault:
2314  * @val:  int 0 or 1
2315  *
2316  * DEPRECATED: The modern options API always enables line numbers.
2317  *
2318  * Set and return the previous value for enabling line numbers in elements
2319  * contents. This may break on old application and is turned off by default.
2320  *
2321  * Returns the last value for 0 for no substitution, 1 for substitution.
2322  */
2323 
2324 int
xmlLineNumbersDefault(int val)2325 xmlLineNumbersDefault(int val) {
2326     int old = xmlLineNumbersDefaultValue;
2327 
2328     xmlLineNumbersDefaultValue = val;
2329     return(old);
2330 }
2331 
2332 /**
2333  * xmlSubstituteEntitiesDefault:
2334  * @val:  int 0 or 1
2335  *
2336  * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2337  *
2338  * Set and return the previous value for default entity support.
2339  * Initially the parser always keep entity references instead of substituting
2340  * entity values in the output. This function has to be used to change the
2341  * default parser behavior
2342  * SAX::substituteEntities() has to be used for changing that on a file by
2343  * file basis.
2344  *
2345  * Returns the last value for 0 for no substitution, 1 for substitution.
2346  */
2347 
2348 int
xmlSubstituteEntitiesDefault(int val)2349 xmlSubstituteEntitiesDefault(int val) {
2350     int old = xmlSubstituteEntitiesDefaultValue;
2351 
2352     xmlSubstituteEntitiesDefaultValue = val;
2353     return(old);
2354 }
2355 
2356 /**
2357  * xmlKeepBlanksDefault:
2358  * @val:  int 0 or 1
2359  *
2360  * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2361  *
2362  * Set and return the previous value for default blanks text nodes support.
2363  * The 1.x version of the parser used an heuristic to try to detect
2364  * ignorable white spaces. As a result the SAX callback was generating
2365  * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2366  * using the DOM output text nodes containing those blanks were not generated.
2367  * The 2.x and later version will switch to the XML standard way and
2368  * ignorableWhitespace() are only generated when running the parser in
2369  * validating mode and when the current element doesn't allow CDATA or
2370  * mixed content.
2371  * This function is provided as a way to force the standard behavior
2372  * on 1.X libs and to switch back to the old mode for compatibility when
2373  * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2374  * by using xmlIsBlankNode() commodity function to detect the "empty"
2375  * nodes generated.
2376  * This value also affect autogeneration of indentation when saving code
2377  * if blanks sections are kept, indentation is not generated.
2378  *
2379  * Returns the last value for 0 for no substitution, 1 for substitution.
2380  */
2381 
2382 int
xmlKeepBlanksDefault(int val)2383 xmlKeepBlanksDefault(int val) {
2384     int old = xmlKeepBlanksDefaultValue;
2385 
2386     xmlKeepBlanksDefaultValue = val;
2387 #ifdef LIBXML_OUTPUT_ENABLED
2388     if (!val)
2389         xmlIndentTreeOutput = 1;
2390 #endif
2391     return(old);
2392 }
2393 
2394