• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * entities.c : implementation for the XML entities handling
3  *
4  * See Copyright for the status of this software.
5  *
6  * daniel@veillard.com
7  */
8 
9 /* To avoid EBCDIC trouble when parsing on zOS */
10 #if defined(__MVS__)
11 #pragma convert("ISO8859-1")
12 #endif
13 
14 #define IN_LIBXML
15 #include "libxml.h"
16 
17 #include <string.h>
18 #include <stdlib.h>
19 
20 #include <libxml/xmlmemory.h>
21 #include <libxml/hash.h>
22 #include <libxml/entities.h>
23 #include <libxml/parser.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/xmlerror.h>
26 #include <libxml/dict.h>
27 
28 #include "private/entities.h"
29 #include "private/error.h"
30 
31 /*
32  * The XML predefined entities.
33  */
34 
35 static xmlEntity xmlEntityLt = {
36     NULL, XML_ENTITY_DECL, BAD_CAST "lt",
37     NULL, NULL, NULL, NULL, NULL, NULL,
38     BAD_CAST "<", BAD_CAST "<", 1,
39     XML_INTERNAL_PREDEFINED_ENTITY,
40     NULL, NULL, NULL, NULL, 0, 0, 0
41 };
42 static xmlEntity xmlEntityGt = {
43     NULL, XML_ENTITY_DECL, BAD_CAST "gt",
44     NULL, NULL, NULL, NULL, NULL, NULL,
45     BAD_CAST ">", BAD_CAST ">", 1,
46     XML_INTERNAL_PREDEFINED_ENTITY,
47     NULL, NULL, NULL, NULL, 0, 0, 0
48 };
49 static xmlEntity xmlEntityAmp = {
50     NULL, XML_ENTITY_DECL, BAD_CAST "amp",
51     NULL, NULL, NULL, NULL, NULL, NULL,
52     BAD_CAST "&", BAD_CAST "&", 1,
53     XML_INTERNAL_PREDEFINED_ENTITY,
54     NULL, NULL, NULL, NULL, 0, 0, 0
55 };
56 static xmlEntity xmlEntityQuot = {
57     NULL, XML_ENTITY_DECL, BAD_CAST "quot",
58     NULL, NULL, NULL, NULL, NULL, NULL,
59     BAD_CAST "\"", BAD_CAST "\"", 1,
60     XML_INTERNAL_PREDEFINED_ENTITY,
61     NULL, NULL, NULL, NULL, 0, 0, 0
62 };
63 static xmlEntity xmlEntityApos = {
64     NULL, XML_ENTITY_DECL, BAD_CAST "apos",
65     NULL, NULL, NULL, NULL, NULL, NULL,
66     BAD_CAST "'", BAD_CAST "'", 1,
67     XML_INTERNAL_PREDEFINED_ENTITY,
68     NULL, NULL, NULL, NULL, 0, 0, 0
69 };
70 
71 /**
72  * xmlEntitiesErrMemory:
73  * @extra:  extra information
74  *
75  * Handle an out of memory condition
76  */
77 static void
xmlEntitiesErrMemory(const char * extra)78 xmlEntitiesErrMemory(const char *extra)
79 {
80     __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra);
81 }
82 
83 /**
84  * xmlEntitiesErr:
85  * @code:  the error code
86  * @msg:  the message
87  *
88  * Raise an error.
89  */
90 static void LIBXML_ATTR_FORMAT(2,0)
xmlEntitiesErr(xmlParserErrors code,const char * msg)91 xmlEntitiesErr(xmlParserErrors code, const char *msg)
92 {
93     __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL);
94 }
95 
96 /**
97  * xmlEntitiesWarn:
98  * @code:  the error code
99  * @msg:  the message
100  *
101  * Raise a warning.
102  */
103 static void LIBXML_ATTR_FORMAT(2,0)
xmlEntitiesWarn(xmlParserErrors code,const char * msg,const xmlChar * str1)104 xmlEntitiesWarn(xmlParserErrors code, const char *msg, const xmlChar *str1)
105 {
106     __xmlRaiseError(NULL, NULL, NULL,
107                 NULL, NULL, XML_FROM_TREE, code,
108                 XML_ERR_WARNING, NULL, 0,
109                 (const char *)str1, NULL, NULL, 0, 0,
110                 msg, (const char *)str1, NULL);
111 }
112 
113 /*
114  * xmlFreeEntity : clean-up an entity record.
115  */
116 void
xmlFreeEntity(xmlEntityPtr entity)117 xmlFreeEntity(xmlEntityPtr entity)
118 {
119     xmlDictPtr dict = NULL;
120 
121     if (entity == NULL)
122         return;
123 
124     if (entity->doc != NULL)
125         dict = entity->doc->dict;
126 
127 
128     if ((entity->children) && (entity->owner == 1) &&
129         (entity == (xmlEntityPtr) entity->children->parent))
130         xmlFreeNodeList(entity->children);
131     if ((entity->name != NULL) &&
132         ((dict == NULL) || (!xmlDictOwns(dict, entity->name))))
133         xmlFree((char *) entity->name);
134     if (entity->ExternalID != NULL)
135         xmlFree((char *) entity->ExternalID);
136     if (entity->SystemID != NULL)
137         xmlFree((char *) entity->SystemID);
138     if (entity->URI != NULL)
139         xmlFree((char *) entity->URI);
140     if (entity->content != NULL)
141         xmlFree((char *) entity->content);
142     if (entity->orig != NULL)
143         xmlFree((char *) entity->orig);
144     xmlFree(entity);
145 }
146 
147 /*
148  * xmlCreateEntity:
149  *
150  * internal routine doing the entity node structures allocations
151  */
152 static xmlEntityPtr
xmlCreateEntity(xmlDictPtr dict,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)153 xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type,
154 	        const xmlChar *ExternalID, const xmlChar *SystemID,
155 	        const xmlChar *content) {
156     xmlEntityPtr ret;
157 
158     ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
159     if (ret == NULL) {
160         xmlEntitiesErrMemory("xmlCreateEntity: malloc failed");
161 	return(NULL);
162     }
163     memset(ret, 0, sizeof(xmlEntity));
164     ret->type = XML_ENTITY_DECL;
165 
166     /*
167      * fill the structure.
168      */
169     ret->etype = (xmlEntityType) type;
170     if (dict == NULL) {
171 	ret->name = xmlStrdup(name);
172 	if (ExternalID != NULL)
173 	    ret->ExternalID = xmlStrdup(ExternalID);
174 	if (SystemID != NULL)
175 	    ret->SystemID = xmlStrdup(SystemID);
176     } else {
177         ret->name = xmlDictLookup(dict, name, -1);
178 	ret->ExternalID = xmlStrdup(ExternalID);
179 	ret->SystemID = xmlStrdup(SystemID);
180     }
181     if (content != NULL) {
182         ret->length = xmlStrlen(content);
183 	ret->content = xmlStrndup(content, ret->length);
184      } else {
185         ret->length = 0;
186         ret->content = NULL;
187     }
188     ret->URI = NULL; /* to be computed by the layer knowing
189 			the defining entity */
190     ret->orig = NULL;
191     ret->owner = 0;
192 
193     return(ret);
194 }
195 
196 /*
197  * xmlAddEntity : register a new entity for an entities table.
198  */
199 static xmlEntityPtr
xmlAddEntity(xmlDtdPtr dtd,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)200 xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
201 	  const xmlChar *ExternalID, const xmlChar *SystemID,
202 	  const xmlChar *content) {
203     xmlDictPtr dict = NULL;
204     xmlEntitiesTablePtr table = NULL;
205     xmlEntityPtr ret, predef;
206 
207     if (name == NULL)
208 	return(NULL);
209     if (dtd == NULL)
210 	return(NULL);
211     if (dtd->doc != NULL)
212         dict = dtd->doc->dict;
213 
214     switch (type) {
215         case XML_INTERNAL_GENERAL_ENTITY:
216         case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
217         case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
218             predef = xmlGetPredefinedEntity(name);
219             if (predef != NULL) {
220                 int valid = 0;
221 
222                 /* 4.6 Predefined Entities */
223                 if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
224                     (content != NULL)) {
225                     int c = predef->content[0];
226 
227                     if (((content[0] == c) && (content[1] == 0)) &&
228                         ((c == '>') || (c == '\'') || (c == '"'))) {
229                         valid = 1;
230                     } else if ((content[0] == '&') && (content[1] == '#')) {
231                         if (content[2] == 'x') {
232                             xmlChar *hex = BAD_CAST "0123456789ABCDEF";
233                             xmlChar ref[] = "00;";
234 
235                             ref[0] = hex[c / 16 % 16];
236                             ref[1] = hex[c % 16];
237                             if (xmlStrcasecmp(&content[3], ref) == 0)
238                                 valid = 1;
239                         } else {
240                             xmlChar ref[] = "00;";
241 
242                             ref[0] = '0' + c / 10 % 10;
243                             ref[1] = '0' + c % 10;
244                             if (xmlStrEqual(&content[2], ref))
245                                 valid = 1;
246                         }
247                     }
248                 }
249                 if (!valid) {
250                     xmlEntitiesWarn(XML_ERR_ENTITY_PROCESSING,
251                             "xmlAddEntity: invalid redeclaration of predefined"
252                             " entity '%s'", name);
253                     return(NULL);
254                 }
255             }
256 	    if (dtd->entities == NULL)
257 		dtd->entities = xmlHashCreateDict(0, dict);
258 	    table = dtd->entities;
259 	    break;
260         case XML_INTERNAL_PARAMETER_ENTITY:
261         case XML_EXTERNAL_PARAMETER_ENTITY:
262 	    if (dtd->pentities == NULL)
263 		dtd->pentities = xmlHashCreateDict(0, dict);
264 	    table = dtd->pentities;
265 	    break;
266         case XML_INTERNAL_PREDEFINED_ENTITY:
267 	    return(NULL);
268     }
269     if (table == NULL)
270 	return(NULL);
271     ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
272     if (ret == NULL)
273         return(NULL);
274     ret->doc = dtd->doc;
275 
276     if (xmlHashAddEntry(table, name, ret)) {
277 	/*
278 	 * entity was already defined at another level.
279 	 */
280         xmlFreeEntity(ret);
281 	return(NULL);
282     }
283     return(ret);
284 }
285 
286 /**
287  * xmlGetPredefinedEntity:
288  * @name:  the entity name
289  *
290  * Check whether this name is an predefined entity.
291  *
292  * Returns NULL if not, otherwise the entity
293  */
294 xmlEntityPtr
xmlGetPredefinedEntity(const xmlChar * name)295 xmlGetPredefinedEntity(const xmlChar *name) {
296     if (name == NULL) return(NULL);
297     switch (name[0]) {
298         case 'l':
299 	    if (xmlStrEqual(name, BAD_CAST "lt"))
300 	        return(&xmlEntityLt);
301 	    break;
302         case 'g':
303 	    if (xmlStrEqual(name, BAD_CAST "gt"))
304 	        return(&xmlEntityGt);
305 	    break;
306         case 'a':
307 	    if (xmlStrEqual(name, BAD_CAST "amp"))
308 	        return(&xmlEntityAmp);
309 	    if (xmlStrEqual(name, BAD_CAST "apos"))
310 	        return(&xmlEntityApos);
311 	    break;
312         case 'q':
313 	    if (xmlStrEqual(name, BAD_CAST "quot"))
314 	        return(&xmlEntityQuot);
315 	    break;
316 	default:
317 	    break;
318     }
319     return(NULL);
320 }
321 
322 /**
323  * xmlAddDtdEntity:
324  * @doc:  the document
325  * @name:  the entity name
326  * @type:  the entity type XML_xxx_yyy_ENTITY
327  * @ExternalID:  the entity external ID if available
328  * @SystemID:  the entity system ID if available
329  * @content:  the entity content
330  *
331  * Register a new entity for this document DTD external subset.
332  *
333  * Returns a pointer to the entity or NULL in case of error
334  */
335 xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)336 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
337 	        const xmlChar *ExternalID, const xmlChar *SystemID,
338 		const xmlChar *content) {
339     xmlEntityPtr ret;
340     xmlDtdPtr dtd;
341 
342     if (doc == NULL) {
343 	xmlEntitiesErr(XML_DTD_NO_DOC,
344 	        "xmlAddDtdEntity: document is NULL");
345 	return(NULL);
346     }
347     if (doc->extSubset == NULL) {
348 	xmlEntitiesErr(XML_DTD_NO_DTD,
349 	        "xmlAddDtdEntity: document without external subset");
350 	return(NULL);
351     }
352     dtd = doc->extSubset;
353     ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
354     if (ret == NULL) return(NULL);
355 
356     /*
357      * Link it to the DTD
358      */
359     ret->parent = dtd;
360     ret->doc = dtd->doc;
361     if (dtd->last == NULL) {
362 	dtd->children = dtd->last = (xmlNodePtr) ret;
363     } else {
364         dtd->last->next = (xmlNodePtr) ret;
365 	ret->prev = dtd->last;
366 	dtd->last = (xmlNodePtr) ret;
367     }
368     return(ret);
369 }
370 
371 /**
372  * xmlAddDocEntity:
373  * @doc:  the document
374  * @name:  the entity name
375  * @type:  the entity type XML_xxx_yyy_ENTITY
376  * @ExternalID:  the entity external ID if available
377  * @SystemID:  the entity system ID if available
378  * @content:  the entity content
379  *
380  * Register a new entity for this document.
381  *
382  * Returns a pointer to the entity or NULL in case of error
383  */
384 xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)385 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
386 	        const xmlChar *ExternalID, const xmlChar *SystemID,
387 	        const xmlChar *content) {
388     xmlEntityPtr ret;
389     xmlDtdPtr dtd;
390 
391     if (doc == NULL) {
392 	xmlEntitiesErr(XML_DTD_NO_DOC,
393 	        "xmlAddDocEntity: document is NULL");
394 	return(NULL);
395     }
396     if (doc->intSubset == NULL) {
397 	xmlEntitiesErr(XML_DTD_NO_DTD,
398 	        "xmlAddDocEntity: document without internal subset");
399 	return(NULL);
400     }
401     dtd = doc->intSubset;
402     ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
403     if (ret == NULL) return(NULL);
404 
405     /*
406      * Link it to the DTD
407      */
408     ret->parent = dtd;
409     ret->doc = dtd->doc;
410     if (dtd->last == NULL) {
411 	dtd->children = dtd->last = (xmlNodePtr) ret;
412     } else {
413 	dtd->last->next = (xmlNodePtr) ret;
414 	ret->prev = dtd->last;
415 	dtd->last = (xmlNodePtr) ret;
416     }
417     return(ret);
418 }
419 
420 /**
421  * xmlNewEntity:
422  * @doc:  the document
423  * @name:  the entity name
424  * @type:  the entity type XML_xxx_yyy_ENTITY
425  * @ExternalID:  the entity external ID if available
426  * @SystemID:  the entity system ID if available
427  * @content:  the entity content
428  *
429  * Create a new entity, this differs from xmlAddDocEntity() that if
430  * the document is NULL or has no internal subset defined, then an
431  * unlinked entity structure will be returned, it is then the responsibility
432  * of the caller to link it to the document later or free it when not needed
433  * anymore.
434  *
435  * Returns a pointer to the entity or NULL in case of error
436  */
437 xmlEntityPtr
xmlNewEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)438 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
439 	     const xmlChar *ExternalID, const xmlChar *SystemID,
440 	     const xmlChar *content) {
441     xmlEntityPtr ret;
442     xmlDictPtr dict;
443 
444     if ((doc != NULL) && (doc->intSubset != NULL)) {
445 	return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
446     }
447     if (doc != NULL)
448         dict = doc->dict;
449     else
450         dict = NULL;
451     ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
452     if (ret == NULL)
453         return(NULL);
454     ret->doc = doc;
455     return(ret);
456 }
457 
458 /**
459  * xmlGetEntityFromTable:
460  * @table:  an entity table
461  * @name:  the entity name
462  * @parameter:  look for parameter entities
463  *
464  * Do an entity lookup in the table.
465  * returns the corresponding parameter entity, if found.
466  *
467  * Returns A pointer to the entity structure or NULL if not found.
468  */
469 static xmlEntityPtr
xmlGetEntityFromTable(xmlEntitiesTablePtr table,const xmlChar * name)470 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
471     return((xmlEntityPtr) xmlHashLookup(table, name));
472 }
473 
474 /**
475  * xmlGetParameterEntity:
476  * @doc:  the document referencing the entity
477  * @name:  the entity name
478  *
479  * Do an entity lookup in the internal and external subsets and
480  * returns the corresponding parameter entity, if found.
481  *
482  * Returns A pointer to the entity structure or NULL if not found.
483  */
484 xmlEntityPtr
xmlGetParameterEntity(xmlDocPtr doc,const xmlChar * name)485 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
486     xmlEntitiesTablePtr table;
487     xmlEntityPtr ret;
488 
489     if (doc == NULL)
490 	return(NULL);
491     if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
492 	table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
493 	ret = xmlGetEntityFromTable(table, name);
494 	if (ret != NULL)
495 	    return(ret);
496     }
497     if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
498 	table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
499 	return(xmlGetEntityFromTable(table, name));
500     }
501     return(NULL);
502 }
503 
504 /**
505  * xmlGetDtdEntity:
506  * @doc:  the document referencing the entity
507  * @name:  the entity name
508  *
509  * Do an entity lookup in the DTD entity hash table and
510  * returns the corresponding entity, if found.
511  * Note: the first argument is the document node, not the DTD node.
512  *
513  * Returns A pointer to the entity structure or NULL if not found.
514  */
515 xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc,const xmlChar * name)516 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
517     xmlEntitiesTablePtr table;
518 
519     if (doc == NULL)
520 	return(NULL);
521     if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
522 	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
523 	return(xmlGetEntityFromTable(table, name));
524     }
525     return(NULL);
526 }
527 
528 /**
529  * xmlGetDocEntity:
530  * @doc:  the document referencing the entity
531  * @name:  the entity name
532  *
533  * Do an entity lookup in the document entity hash table and
534  * returns the corresponding entity, otherwise a lookup is done
535  * in the predefined entities too.
536  *
537  * Returns A pointer to the entity structure or NULL if not found.
538  */
539 xmlEntityPtr
xmlGetDocEntity(const xmlDoc * doc,const xmlChar * name)540 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
541     xmlEntityPtr cur;
542     xmlEntitiesTablePtr table;
543 
544     if (doc != NULL) {
545 	if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
546 	    table = (xmlEntitiesTablePtr) doc->intSubset->entities;
547 	    cur = xmlGetEntityFromTable(table, name);
548 	    if (cur != NULL)
549 		return(cur);
550 	}
551 	if (doc->standalone != 1) {
552 	    if ((doc->extSubset != NULL) &&
553 		(doc->extSubset->entities != NULL)) {
554 		table = (xmlEntitiesTablePtr) doc->extSubset->entities;
555 		cur = xmlGetEntityFromTable(table, name);
556 		if (cur != NULL)
557 		    return(cur);
558 	    }
559 	}
560     }
561     return(xmlGetPredefinedEntity(name));
562 }
563 
564 /*
565  * Macro used to grow the current buffer.
566  */
567 #define growBufferReentrant() {						\
568     xmlChar *tmp;                                                       \
569     size_t new_size = buffer_size * 2;                                  \
570     if (new_size < buffer_size) goto mem_error;                         \
571     tmp = (xmlChar *) xmlRealloc(buffer, new_size);	                \
572     if (tmp == NULL) goto mem_error;                                    \
573     buffer = tmp;							\
574     buffer_size = new_size;						\
575 }
576 
577 /**
578  * xmlEncodeEntitiesInternal:
579  * @doc:  the document containing the string
580  * @input:  A string to convert to XML.
581  * @attr: are we handling an attribute value
582  *
583  * Do a global encoding of a string, replacing the predefined entities
584  * and non ASCII values with their entities and CharRef counterparts.
585  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
586  * must be deallocated.
587  *
588  * Returns A newly allocated string with the substitution done.
589  */
590 static xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc,const xmlChar * input,int attr)591 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
592     const xmlChar *cur = input;
593     xmlChar *buffer = NULL;
594     xmlChar *out = NULL;
595     size_t buffer_size = 0;
596     int html = 0;
597 
598     if (input == NULL) return(NULL);
599     if (doc != NULL)
600         html = (doc->type == XML_HTML_DOCUMENT_NODE);
601 
602     /*
603      * allocate an translation buffer.
604      */
605     buffer_size = 1000;
606     buffer = (xmlChar *) xmlMalloc(buffer_size);
607     if (buffer == NULL) {
608         xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
609 	return(NULL);
610     }
611     out = buffer;
612 
613     while (*cur != '\0') {
614         size_t indx = out - buffer;
615         if (indx + 100 > buffer_size) {
616 
617 	    growBufferReentrant();
618 	    out = &buffer[indx];
619 	}
620 
621 	/*
622 	 * By default one have to encode at least '<', '>', '"' and '&' !
623 	 */
624 	if (*cur == '<') {
625 	    const xmlChar *end;
626 
627 	    /*
628 	     * Special handling of server side include in HTML attributes
629 	     */
630 	    if (html && attr &&
631 	        (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
632 	        ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
633 	        while (cur != end) {
634 		    *out++ = *cur++;
635 		    indx = out - buffer;
636 		    if (indx + 100 > buffer_size) {
637 			growBufferReentrant();
638 			out = &buffer[indx];
639 		    }
640 		}
641 		*out++ = *cur++;
642 		*out++ = *cur++;
643 		*out++ = *cur++;
644 		continue;
645 	    }
646 	    *out++ = '&';
647 	    *out++ = 'l';
648 	    *out++ = 't';
649 	    *out++ = ';';
650 	} else if (*cur == '>') {
651 	    *out++ = '&';
652 	    *out++ = 'g';
653 	    *out++ = 't';
654 	    *out++ = ';';
655 	} else if (*cur == '&') {
656 	    /*
657 	     * Special handling of &{...} construct from HTML 4, see
658 	     * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
659 	     */
660 	    if (html && attr && (cur[1] == '{') &&
661 	        (strchr((const char *) cur, '}'))) {
662 	        while (*cur != '}') {
663 		    *out++ = *cur++;
664 		    indx = out - buffer;
665 		    if (indx + 100 > buffer_size) {
666 			growBufferReentrant();
667 			out = &buffer[indx];
668 		    }
669 		}
670 		*out++ = *cur++;
671 		continue;
672 	    }
673 	    *out++ = '&';
674 	    *out++ = 'a';
675 	    *out++ = 'm';
676 	    *out++ = 'p';
677 	    *out++ = ';';
678 	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
679 	    (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
680 	    /*
681 	     * default case, just copy !
682 	     */
683 	    *out++ = *cur;
684 	} else if (*cur >= 0x80) {
685 	    if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
686 		/*
687 		 * Bjørn Reese <br@sseusa.com> provided the patch
688 	        xmlChar xc;
689 	        xc = (*cur & 0x3F) << 6;
690 	        if (cur[1] != 0) {
691 		    xc += *(++cur) & 0x3F;
692 		    *out++ = xc;
693 	        } else
694 		 */
695 		*out++ = *cur;
696 	    } else {
697 		/*
698 		 * We assume we have UTF-8 input.
699 		 * It must match either:
700 		 *   110xxxxx 10xxxxxx
701 		 *   1110xxxx 10xxxxxx 10xxxxxx
702 		 *   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
703 		 * That is:
704 		 *   cur[0] is 11xxxxxx
705 		 *   cur[1] is 10xxxxxx
706 		 *   cur[2] is 10xxxxxx if cur[0] is 111xxxxx
707 		 *   cur[3] is 10xxxxxx if cur[0] is 1111xxxx
708 		 *   cur[0] is not 11111xxx
709 		 */
710 		char buf[11], *ptr;
711 		int val = 0, l = 1;
712 
713 		if (((cur[0] & 0xC0) != 0xC0) ||
714 		    ((cur[1] & 0xC0) != 0x80) ||
715 		    (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
716 		    (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
717 		    (((cur[0] & 0xF8) == 0xF8))) {
718 		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
719 			    "xmlEncodeEntities: input not UTF-8");
720 		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
721 		    buf[sizeof(buf) - 1] = 0;
722 		    ptr = buf;
723 		    while (*ptr != 0) *out++ = *ptr++;
724 		    cur++;
725 		    continue;
726 		} else if (*cur < 0xE0) {
727                     val = (cur[0]) & 0x1F;
728 		    val <<= 6;
729 		    val |= (cur[1]) & 0x3F;
730 		    l = 2;
731 		} else if (*cur < 0xF0) {
732                     val = (cur[0]) & 0x0F;
733 		    val <<= 6;
734 		    val |= (cur[1]) & 0x3F;
735 		    val <<= 6;
736 		    val |= (cur[2]) & 0x3F;
737 		    l = 3;
738 		} else if (*cur < 0xF8) {
739                     val = (cur[0]) & 0x07;
740 		    val <<= 6;
741 		    val |= (cur[1]) & 0x3F;
742 		    val <<= 6;
743 		    val |= (cur[2]) & 0x3F;
744 		    val <<= 6;
745 		    val |= (cur[3]) & 0x3F;
746 		    l = 4;
747 		}
748 		if ((l == 1) || (!IS_CHAR(val))) {
749 		    xmlEntitiesErr(XML_ERR_INVALID_CHAR,
750 			"xmlEncodeEntities: char out of range\n");
751 		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
752 		    buf[sizeof(buf) - 1] = 0;
753 		    ptr = buf;
754 		    while (*ptr != 0) *out++ = *ptr++;
755 		    cur++;
756 		    continue;
757 		}
758 		/*
759 		 * We could do multiple things here. Just save as a char ref
760 		 */
761 		snprintf(buf, sizeof(buf), "&#x%X;", val);
762 		buf[sizeof(buf) - 1] = 0;
763 		ptr = buf;
764 		while (*ptr != 0) *out++ = *ptr++;
765 		cur += l;
766 		continue;
767 	    }
768 	} else if (IS_BYTE_CHAR(*cur)) {
769 	    char buf[11], *ptr;
770 
771 	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
772 	    buf[sizeof(buf) - 1] = 0;
773             ptr = buf;
774 	    while (*ptr != 0) *out++ = *ptr++;
775 	}
776 	cur++;
777     }
778     *out = 0;
779     return(buffer);
780 
781 mem_error:
782     xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
783     xmlFree(buffer);
784     return(NULL);
785 }
786 
787 /**
788  * xmlEncodeAttributeEntities:
789  * @doc:  the document containing the string
790  * @input:  A string to convert to XML.
791  *
792  * Do a global encoding of a string, replacing the predefined entities
793  * and non ASCII values with their entities and CharRef counterparts for
794  * attribute values.
795  *
796  * Returns A newly allocated string with the substitution done.
797  */
798 xmlChar *
xmlEncodeAttributeEntities(xmlDocPtr doc,const xmlChar * input)799 xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
800     return xmlEncodeEntitiesInternal(doc, input, 1);
801 }
802 
803 /**
804  * xmlEncodeEntitiesReentrant:
805  * @doc:  the document containing the string
806  * @input:  A string to convert to XML.
807  *
808  * Do a global encoding of a string, replacing the predefined entities
809  * and non ASCII values with their entities and CharRef counterparts.
810  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
811  * must be deallocated.
812  *
813  * Returns A newly allocated string with the substitution done.
814  */
815 xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc,const xmlChar * input)816 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
817     return xmlEncodeEntitiesInternal(doc, input, 0);
818 }
819 
820 /**
821  * xmlEncodeSpecialChars:
822  * @doc:  the document containing the string
823  * @input:  A string to convert to XML.
824  *
825  * Do a global encoding of a string, replacing the predefined entities
826  * this routine is reentrant, and result must be deallocated.
827  *
828  * Returns A newly allocated string with the substitution done.
829  */
830 xmlChar *
xmlEncodeSpecialChars(const xmlDoc * doc ATTRIBUTE_UNUSED,const xmlChar * input)831 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
832     const xmlChar *cur = input;
833     xmlChar *buffer = NULL;
834     xmlChar *out = NULL;
835     size_t buffer_size = 0;
836     if (input == NULL) return(NULL);
837 
838     /*
839      * allocate an translation buffer.
840      */
841     buffer_size = 1000;
842     buffer = (xmlChar *) xmlMalloc(buffer_size);
843     if (buffer == NULL) {
844         xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed");
845 	return(NULL);
846     }
847     out = buffer;
848 
849     while (*cur != '\0') {
850         size_t indx = out - buffer;
851         if (indx + 10 > buffer_size) {
852 
853 	    growBufferReentrant();
854 	    out = &buffer[indx];
855 	}
856 
857 	/*
858 	 * By default one have to encode at least '<', '>', '"' and '&' !
859 	 */
860 	if (*cur == '<') {
861 	    *out++ = '&';
862 	    *out++ = 'l';
863 	    *out++ = 't';
864 	    *out++ = ';';
865 	} else if (*cur == '>') {
866 	    *out++ = '&';
867 	    *out++ = 'g';
868 	    *out++ = 't';
869 	    *out++ = ';';
870 	} else if (*cur == '&') {
871 	    *out++ = '&';
872 	    *out++ = 'a';
873 	    *out++ = 'm';
874 	    *out++ = 'p';
875 	    *out++ = ';';
876 	} else if (*cur == '"') {
877 	    *out++ = '&';
878 	    *out++ = 'q';
879 	    *out++ = 'u';
880 	    *out++ = 'o';
881 	    *out++ = 't';
882 	    *out++ = ';';
883 	} else if (*cur == '\r') {
884 	    *out++ = '&';
885 	    *out++ = '#';
886 	    *out++ = '1';
887 	    *out++ = '3';
888 	    *out++ = ';';
889 	} else {
890 	    /*
891 	     * Works because on UTF-8, all extended sequences cannot
892 	     * result in bytes in the ASCII range.
893 	     */
894 	    *out++ = *cur;
895 	}
896 	cur++;
897     }
898     *out = 0;
899     return(buffer);
900 
901 mem_error:
902     xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed");
903     xmlFree(buffer);
904     return(NULL);
905 }
906 
907 /**
908  * xmlCreateEntitiesTable:
909  *
910  * create and initialize an empty entities hash table.
911  * This really doesn't make sense and should be deprecated
912  *
913  * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
914  */
915 xmlEntitiesTablePtr
xmlCreateEntitiesTable(void)916 xmlCreateEntitiesTable(void) {
917     return((xmlEntitiesTablePtr) xmlHashCreate(0));
918 }
919 
920 /**
921  * xmlFreeEntityWrapper:
922  * @entity:  An entity
923  * @name:  its name
924  *
925  * Deallocate the memory used by an entities in the hash table.
926  */
927 static void
xmlFreeEntityWrapper(void * entity,const xmlChar * name ATTRIBUTE_UNUSED)928 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
929     if (entity != NULL)
930 	xmlFreeEntity((xmlEntityPtr) entity);
931 }
932 
933 /**
934  * xmlFreeEntitiesTable:
935  * @table:  An entity table
936  *
937  * Deallocate the memory used by an entities hash table.
938  */
939 void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table)940 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
941     xmlHashFree(table, xmlFreeEntityWrapper);
942 }
943 
944 #ifdef LIBXML_TREE_ENABLED
945 /**
946  * xmlCopyEntity:
947  * @ent:  An entity
948  *
949  * Build a copy of an entity
950  *
951  * Returns the new xmlEntitiesPtr or NULL in case of error.
952  */
953 static void *
xmlCopyEntity(void * payload,const xmlChar * name ATTRIBUTE_UNUSED)954 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
955     xmlEntityPtr ent = (xmlEntityPtr) payload;
956     xmlEntityPtr cur;
957 
958     cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
959     if (cur == NULL) {
960         xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed");
961 	return(NULL);
962     }
963     memset(cur, 0, sizeof(xmlEntity));
964     cur->type = XML_ENTITY_DECL;
965 
966     cur->etype = ent->etype;
967     if (ent->name != NULL)
968 	cur->name = xmlStrdup(ent->name);
969     if (ent->ExternalID != NULL)
970 	cur->ExternalID = xmlStrdup(ent->ExternalID);
971     if (ent->SystemID != NULL)
972 	cur->SystemID = xmlStrdup(ent->SystemID);
973     if (ent->content != NULL)
974 	cur->content = xmlStrdup(ent->content);
975     if (ent->orig != NULL)
976 	cur->orig = xmlStrdup(ent->orig);
977     if (ent->URI != NULL)
978 	cur->URI = xmlStrdup(ent->URI);
979     return(cur);
980 }
981 
982 /**
983  * xmlCopyEntitiesTable:
984  * @table:  An entity table
985  *
986  * Build a copy of an entity table.
987  *
988  * Returns the new xmlEntitiesTablePtr or NULL in case of error.
989  */
990 xmlEntitiesTablePtr
xmlCopyEntitiesTable(xmlEntitiesTablePtr table)991 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
992     return(xmlHashCopy(table, xmlCopyEntity));
993 }
994 #endif /* LIBXML_TREE_ENABLED */
995 
996 #ifdef LIBXML_OUTPUT_ENABLED
997 
998 /**
999  * xmlDumpEntityContent:
1000  * @buf:  An XML buffer.
1001  * @content:  The entity content.
1002  *
1003  * This will dump the quoted string value, taking care of the special
1004  * treatment required by %
1005  */
1006 static void
xmlDumpEntityContent(xmlBufferPtr buf,const xmlChar * content)1007 xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) {
1008     if (xmlStrchr(content, '%')) {
1009         const xmlChar * base, *cur;
1010 
1011 	xmlBufferCCat(buf, "\"");
1012 	base = cur = content;
1013 	while (*cur != 0) {
1014 	    if (*cur == '"') {
1015 		if (base != cur)
1016 		    xmlBufferAdd(buf, base, cur - base);
1017 		xmlBufferAdd(buf, BAD_CAST "&quot;", 6);
1018 		cur++;
1019 		base = cur;
1020 	    } else if (*cur == '%') {
1021 		if (base != cur)
1022 		    xmlBufferAdd(buf, base, cur - base);
1023 		xmlBufferAdd(buf, BAD_CAST "&#x25;", 6);
1024 		cur++;
1025 		base = cur;
1026 	    } else {
1027 		cur++;
1028 	    }
1029 	}
1030 	if (base != cur)
1031 	    xmlBufferAdd(buf, base, cur - base);
1032 	xmlBufferCCat(buf, "\"");
1033     } else {
1034         xmlBufferWriteQuotedString(buf, content);
1035     }
1036 }
1037 
1038 /**
1039  * xmlDumpEntityDecl:
1040  * @buf:  An XML buffer.
1041  * @ent:  An entity table
1042  *
1043  * This will dump the content of the entity table as an XML DTD definition
1044  */
1045 void
xmlDumpEntityDecl(xmlBufferPtr buf,xmlEntityPtr ent)1046 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
1047     if ((buf == NULL) || (ent == NULL)) return;
1048     switch (ent->etype) {
1049 	case XML_INTERNAL_GENERAL_ENTITY:
1050 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1051 	    xmlBufferWriteCHAR(buf, ent->name);
1052 	    xmlBufferWriteChar(buf, " ");
1053 	    if (ent->orig != NULL)
1054 		xmlBufferWriteQuotedString(buf, ent->orig);
1055 	    else
1056 		xmlDumpEntityContent(buf, ent->content);
1057 	    xmlBufferWriteChar(buf, ">\n");
1058 	    break;
1059 	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1060 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1061 	    xmlBufferWriteCHAR(buf, ent->name);
1062 	    if (ent->ExternalID != NULL) {
1063 		 xmlBufferWriteChar(buf, " PUBLIC ");
1064 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1065 		 xmlBufferWriteChar(buf, " ");
1066 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1067 	    } else {
1068 		 xmlBufferWriteChar(buf, " SYSTEM ");
1069 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1070 	    }
1071 	    xmlBufferWriteChar(buf, ">\n");
1072 	    break;
1073 	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1074 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1075 	    xmlBufferWriteCHAR(buf, ent->name);
1076 	    if (ent->ExternalID != NULL) {
1077 		 xmlBufferWriteChar(buf, " PUBLIC ");
1078 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1079 		 xmlBufferWriteChar(buf, " ");
1080 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1081 	    } else {
1082 		 xmlBufferWriteChar(buf, " SYSTEM ");
1083 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1084 	    }
1085 	    if (ent->content != NULL) { /* Should be true ! */
1086 		xmlBufferWriteChar(buf, " NDATA ");
1087 		if (ent->orig != NULL)
1088 		    xmlBufferWriteCHAR(buf, ent->orig);
1089 		else
1090 		    xmlBufferWriteCHAR(buf, ent->content);
1091 	    }
1092 	    xmlBufferWriteChar(buf, ">\n");
1093 	    break;
1094 	case XML_INTERNAL_PARAMETER_ENTITY:
1095 	    xmlBufferWriteChar(buf, "<!ENTITY % ");
1096 	    xmlBufferWriteCHAR(buf, ent->name);
1097 	    xmlBufferWriteChar(buf, " ");
1098 	    if (ent->orig == NULL)
1099 		xmlDumpEntityContent(buf, ent->content);
1100 	    else
1101 		xmlBufferWriteQuotedString(buf, ent->orig);
1102 	    xmlBufferWriteChar(buf, ">\n");
1103 	    break;
1104 	case XML_EXTERNAL_PARAMETER_ENTITY:
1105 	    xmlBufferWriteChar(buf, "<!ENTITY % ");
1106 	    xmlBufferWriteCHAR(buf, ent->name);
1107 	    if (ent->ExternalID != NULL) {
1108 		 xmlBufferWriteChar(buf, " PUBLIC ");
1109 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1110 		 xmlBufferWriteChar(buf, " ");
1111 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1112 	    } else {
1113 		 xmlBufferWriteChar(buf, " SYSTEM ");
1114 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1115 	    }
1116 	    xmlBufferWriteChar(buf, ">\n");
1117 	    break;
1118 	default:
1119 	    xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY,
1120 		"xmlDumpEntitiesDecl: internal: unknown type entity type");
1121     }
1122 }
1123 
1124 /**
1125  * xmlDumpEntityDeclScan:
1126  * @ent:  An entity table
1127  * @buf:  An XML buffer.
1128  *
1129  * When using the hash table scan function, arguments need to be reversed
1130  */
1131 static void
xmlDumpEntityDeclScan(void * ent,void * buf,const xmlChar * name ATTRIBUTE_UNUSED)1132 xmlDumpEntityDeclScan(void *ent, void *buf,
1133                       const xmlChar *name ATTRIBUTE_UNUSED) {
1134     xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent);
1135 }
1136 
1137 /**
1138  * xmlDumpEntitiesTable:
1139  * @buf:  An XML buffer.
1140  * @table:  An entity table
1141  *
1142  * This will dump the content of the entity table as an XML DTD definition
1143  */
1144 void
xmlDumpEntitiesTable(xmlBufferPtr buf,xmlEntitiesTablePtr table)1145 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1146     xmlHashScan(table, xmlDumpEntityDeclScan, buf);
1147 }
1148 #endif /* LIBXML_OUTPUT_ENABLED */
1149