• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * entities.c : implementation for the XML entities handling
3  *
4  * See Copyright for the status of this software.
5  *
6  * daniel@veillard.com
7  */
8 
9 /* To avoid EBCDIC trouble when parsing on zOS */
10 #if defined(__MVS__)
11 #pragma convert("ISO8859-1")
12 #endif
13 
14 #define IN_LIBXML
15 #include "libxml.h"
16 
17 #include <string.h>
18 #include <stdlib.h>
19 
20 #include <libxml/xmlmemory.h>
21 #include <libxml/hash.h>
22 #include <libxml/entities.h>
23 #include <libxml/parser.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/xmlerror.h>
26 #include <libxml/dict.h>
27 #include <libxml/xmlsave.h>
28 
29 #include "private/entities.h"
30 #include "private/error.h"
31 
32 /*
33  * The XML predefined entities.
34  */
35 
36 static xmlEntity xmlEntityLt = {
37     NULL, XML_ENTITY_DECL, BAD_CAST "lt",
38     NULL, NULL, NULL, NULL, NULL, NULL,
39     BAD_CAST "<", BAD_CAST "<", 1,
40     XML_INTERNAL_PREDEFINED_ENTITY,
41     NULL, NULL, NULL, NULL, 0, 0, 0
42 };
43 static xmlEntity xmlEntityGt = {
44     NULL, XML_ENTITY_DECL, BAD_CAST "gt",
45     NULL, NULL, NULL, NULL, NULL, NULL,
46     BAD_CAST ">", BAD_CAST ">", 1,
47     XML_INTERNAL_PREDEFINED_ENTITY,
48     NULL, NULL, NULL, NULL, 0, 0, 0
49 };
50 static xmlEntity xmlEntityAmp = {
51     NULL, XML_ENTITY_DECL, BAD_CAST "amp",
52     NULL, NULL, NULL, NULL, NULL, NULL,
53     BAD_CAST "&", BAD_CAST "&", 1,
54     XML_INTERNAL_PREDEFINED_ENTITY,
55     NULL, NULL, NULL, NULL, 0, 0, 0
56 };
57 static xmlEntity xmlEntityQuot = {
58     NULL, XML_ENTITY_DECL, BAD_CAST "quot",
59     NULL, NULL, NULL, NULL, NULL, NULL,
60     BAD_CAST "\"", BAD_CAST "\"", 1,
61     XML_INTERNAL_PREDEFINED_ENTITY,
62     NULL, NULL, NULL, NULL, 0, 0, 0
63 };
64 static xmlEntity xmlEntityApos = {
65     NULL, XML_ENTITY_DECL, BAD_CAST "apos",
66     NULL, NULL, NULL, NULL, NULL, NULL,
67     BAD_CAST "'", BAD_CAST "'", 1,
68     XML_INTERNAL_PREDEFINED_ENTITY,
69     NULL, NULL, NULL, NULL, 0, 0, 0
70 };
71 
72 /*
73  * xmlFreeEntity:
74  * @entity:  an entity
75  *
76  * Frees the entity.
77  */
78 void
xmlFreeEntity(xmlEntityPtr entity)79 xmlFreeEntity(xmlEntityPtr entity)
80 {
81     xmlDictPtr dict = NULL;
82 
83     if (entity == NULL)
84         return;
85 
86     if (entity->doc != NULL)
87         dict = entity->doc->dict;
88 
89 
90     if ((entity->children) &&
91         (entity == (xmlEntityPtr) entity->children->parent))
92         xmlFreeNodeList(entity->children);
93     if ((entity->name != NULL) &&
94         ((dict == NULL) || (!xmlDictOwns(dict, entity->name))))
95         xmlFree((char *) entity->name);
96     if (entity->ExternalID != NULL)
97         xmlFree((char *) entity->ExternalID);
98     if (entity->SystemID != NULL)
99         xmlFree((char *) entity->SystemID);
100     if (entity->URI != NULL)
101         xmlFree((char *) entity->URI);
102     if (entity->content != NULL)
103         xmlFree((char *) entity->content);
104     if (entity->orig != NULL)
105         xmlFree((char *) entity->orig);
106     xmlFree(entity);
107 }
108 
109 /*
110  * xmlCreateEntity:
111  *
112  * internal routine doing the entity node structures allocations
113  */
114 static xmlEntityPtr
xmlCreateEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)115 xmlCreateEntity(xmlDocPtr doc, const xmlChar *name, int type,
116 	        const xmlChar *ExternalID, const xmlChar *SystemID,
117 	        const xmlChar *content) {
118     xmlEntityPtr ret;
119 
120     ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
121     if (ret == NULL)
122 	return(NULL);
123     memset(ret, 0, sizeof(xmlEntity));
124     ret->doc = doc;
125     ret->type = XML_ENTITY_DECL;
126 
127     /*
128      * fill the structure.
129      */
130     ret->etype = (xmlEntityType) type;
131     if ((doc == NULL) || (doc->dict == NULL))
132 	ret->name = xmlStrdup(name);
133     else
134         ret->name = xmlDictLookup(doc->dict, name, -1);
135     if (ret->name == NULL)
136         goto error;
137     if (ExternalID != NULL) {
138         ret->ExternalID = xmlStrdup(ExternalID);
139         if (ret->ExternalID == NULL)
140             goto error;
141     }
142     if (SystemID != NULL) {
143         ret->SystemID = xmlStrdup(SystemID);
144         if (ret->SystemID == NULL)
145             goto error;
146     }
147     if (content != NULL) {
148         ret->length = xmlStrlen(content);
149 	ret->content = xmlStrndup(content, ret->length);
150         if (ret->content == NULL)
151             goto error;
152      } else {
153         ret->length = 0;
154         ret->content = NULL;
155     }
156     ret->URI = NULL; /* to be computed by the layer knowing
157 			the defining entity */
158     ret->orig = NULL;
159 
160     return(ret);
161 
162 error:
163     xmlFreeEntity(ret);
164     return(NULL);
165 }
166 
167 /**
168  * xmlAddEntity:
169  * @doc:  the document
170  * @extSubset:  add to the external or internal subset
171  * @name:  the entity name
172  * @type:  the entity type XML_xxx_yyy_ENTITY
173  * @ExternalID:  the entity external ID if available
174  * @SystemID:  the entity system ID if available
175  * @content:  the entity content
176  * @out:  pointer to resulting entity (optional)
177  *
178  * Register a new entity for this document.
179  *
180  * Available since 2.13.0.
181  *
182  * Returns an xmlParserErrors error code.
183  */
184 int
xmlAddEntity(xmlDocPtr doc,int extSubset,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content,xmlEntityPtr * out)185 xmlAddEntity(xmlDocPtr doc, int extSubset, const xmlChar *name, int type,
186 	  const xmlChar *ExternalID, const xmlChar *SystemID,
187 	  const xmlChar *content, xmlEntityPtr *out) {
188     xmlDtdPtr dtd;
189     xmlDictPtr dict = NULL;
190     xmlEntitiesTablePtr table = NULL;
191     xmlEntityPtr ret, predef;
192     int res;
193 
194     if (out != NULL)
195         *out = NULL;
196     if ((doc == NULL) || (name == NULL))
197 	return(XML_ERR_ARGUMENT);
198     dict = doc->dict;
199 
200     if (extSubset)
201         dtd = doc->extSubset;
202     else
203         dtd = doc->intSubset;
204     if (dtd == NULL)
205         return(XML_DTD_NO_DTD);
206 
207     switch (type) {
208         case XML_INTERNAL_GENERAL_ENTITY:
209         case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
210         case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
211             predef = xmlGetPredefinedEntity(name);
212             if (predef != NULL) {
213                 int valid = 0;
214 
215                 /* 4.6 Predefined Entities */
216                 if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
217                     (content != NULL)) {
218                     int c = predef->content[0];
219 
220                     if (((content[0] == c) && (content[1] == 0)) &&
221                         ((c == '>') || (c == '\'') || (c == '"'))) {
222                         valid = 1;
223                     } else if ((content[0] == '&') && (content[1] == '#')) {
224                         if (content[2] == 'x') {
225                             xmlChar *hex = BAD_CAST "0123456789ABCDEF";
226                             xmlChar ref[] = "00;";
227 
228                             ref[0] = hex[c / 16 % 16];
229                             ref[1] = hex[c % 16];
230                             if (xmlStrcasecmp(&content[3], ref) == 0)
231                                 valid = 1;
232                         } else {
233                             xmlChar ref[] = "00;";
234 
235                             ref[0] = '0' + c / 10 % 10;
236                             ref[1] = '0' + c % 10;
237                             if (xmlStrEqual(&content[2], ref))
238                                 valid = 1;
239                         }
240                     }
241                 }
242                 if (!valid)
243                     return(XML_ERR_REDECL_PREDEF_ENTITY);
244             }
245 	    if (dtd->entities == NULL) {
246 		dtd->entities = xmlHashCreateDict(0, dict);
247                 if (dtd->entities == NULL)
248                     return(XML_ERR_NO_MEMORY);
249             }
250 	    table = dtd->entities;
251 	    break;
252         case XML_INTERNAL_PARAMETER_ENTITY:
253         case XML_EXTERNAL_PARAMETER_ENTITY:
254 	    if (dtd->pentities == NULL) {
255 		dtd->pentities = xmlHashCreateDict(0, dict);
256                 if (dtd->pentities == NULL)
257                     return(XML_ERR_NO_MEMORY);
258             }
259 	    table = dtd->pentities;
260 	    break;
261         default:
262 	    return(XML_ERR_ARGUMENT);
263     }
264     ret = xmlCreateEntity(dtd->doc, name, type, ExternalID, SystemID, content);
265     if (ret == NULL)
266         return(XML_ERR_NO_MEMORY);
267 
268     res = xmlHashAdd(table, name, ret);
269     if (res < 0) {
270         xmlFreeEntity(ret);
271         return(XML_ERR_NO_MEMORY);
272     } else if (res == 0) {
273 	/*
274 	 * entity was already defined at another level.
275 	 */
276         xmlFreeEntity(ret);
277 	return(XML_WAR_ENTITY_REDEFINED);
278     }
279 
280     /*
281      * Link it to the DTD
282      */
283     ret->parent = dtd;
284     ret->doc = dtd->doc;
285     if (dtd->last == NULL) {
286 	dtd->children = dtd->last = (xmlNodePtr) ret;
287     } else {
288 	dtd->last->next = (xmlNodePtr) ret;
289 	ret->prev = dtd->last;
290 	dtd->last = (xmlNodePtr) ret;
291     }
292 
293     if (out != NULL)
294         *out = ret;
295     return(0);
296 }
297 
298 /**
299  * xmlGetPredefinedEntity:
300  * @name:  the entity name
301  *
302  * Check whether this name is an predefined entity.
303  *
304  * Returns NULL if not, otherwise the entity
305  */
306 xmlEntityPtr
xmlGetPredefinedEntity(const xmlChar * name)307 xmlGetPredefinedEntity(const xmlChar *name) {
308     if (name == NULL) return(NULL);
309     switch (name[0]) {
310         case 'l':
311 	    if (xmlStrEqual(name, BAD_CAST "lt"))
312 	        return(&xmlEntityLt);
313 	    break;
314         case 'g':
315 	    if (xmlStrEqual(name, BAD_CAST "gt"))
316 	        return(&xmlEntityGt);
317 	    break;
318         case 'a':
319 	    if (xmlStrEqual(name, BAD_CAST "amp"))
320 	        return(&xmlEntityAmp);
321 	    if (xmlStrEqual(name, BAD_CAST "apos"))
322 	        return(&xmlEntityApos);
323 	    break;
324         case 'q':
325 	    if (xmlStrEqual(name, BAD_CAST "quot"))
326 	        return(&xmlEntityQuot);
327 	    break;
328 	default:
329 	    break;
330     }
331     return(NULL);
332 }
333 
334 /**
335  * xmlAddDtdEntity:
336  * @doc:  the document
337  * @name:  the entity name
338  * @type:  the entity type XML_xxx_yyy_ENTITY
339  * @ExternalID:  the entity external ID if available
340  * @SystemID:  the entity system ID if available
341  * @content:  the entity content
342  *
343  * Register a new entity for this document DTD external subset.
344  *
345  * Returns a pointer to the entity or NULL in case of error
346  */
347 xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)348 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
349 	        const xmlChar *ExternalID, const xmlChar *SystemID,
350 		const xmlChar *content) {
351     xmlEntityPtr ret;
352 
353     xmlAddEntity(doc, 1, name, type, ExternalID, SystemID, content, &ret);
354     return(ret);
355 }
356 
357 /**
358  * xmlAddDocEntity:
359  * @doc:  the document
360  * @name:  the entity name
361  * @type:  the entity type XML_xxx_yyy_ENTITY
362  * @ExternalID:  the entity external ID if available
363  * @SystemID:  the entity system ID if available
364  * @content:  the entity content
365  *
366  * Register a new entity for this document.
367  *
368  * Returns a pointer to the entity or NULL in case of error
369  */
370 xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)371 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
372 	        const xmlChar *ExternalID, const xmlChar *SystemID,
373 	        const xmlChar *content) {
374     xmlEntityPtr ret;
375 
376     xmlAddEntity(doc, 0, name, type, ExternalID, SystemID, content, &ret);
377     return(ret);
378 }
379 
380 /**
381  * xmlNewEntity:
382  * @doc:  the document
383  * @name:  the entity name
384  * @type:  the entity type XML_xxx_yyy_ENTITY
385  * @ExternalID:  the entity external ID if available
386  * @SystemID:  the entity system ID if available
387  * @content:  the entity content
388  *
389  * Create a new entity, this differs from xmlAddDocEntity() that if
390  * the document is NULL or has no internal subset defined, then an
391  * unlinked entity structure will be returned, it is then the responsibility
392  * of the caller to link it to the document later or free it when not needed
393  * anymore.
394  *
395  * Returns a pointer to the entity or NULL in case of error
396  */
397 xmlEntityPtr
xmlNewEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)398 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
399 	     const xmlChar *ExternalID, const xmlChar *SystemID,
400 	     const xmlChar *content) {
401     if ((doc != NULL) && (doc->intSubset != NULL)) {
402 	return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
403     }
404     if (name == NULL)
405         return(NULL);
406     return(xmlCreateEntity(doc, name, type, ExternalID, SystemID, content));
407 }
408 
409 /**
410  * xmlGetEntityFromTable:
411  * @table:  an entity table
412  * @name:  the entity name
413  * @parameter:  look for parameter entities
414  *
415  * Do an entity lookup in the table.
416  * returns the corresponding parameter entity, if found.
417  *
418  * Returns A pointer to the entity structure or NULL if not found.
419  */
420 static xmlEntityPtr
xmlGetEntityFromTable(xmlEntitiesTablePtr table,const xmlChar * name)421 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
422     return((xmlEntityPtr) xmlHashLookup(table, name));
423 }
424 
425 /**
426  * xmlGetParameterEntity:
427  * @doc:  the document referencing the entity
428  * @name:  the entity name
429  *
430  * Do an entity lookup in the internal and external subsets and
431  * returns the corresponding parameter entity, if found.
432  *
433  * Returns A pointer to the entity structure or NULL if not found.
434  */
435 xmlEntityPtr
xmlGetParameterEntity(xmlDocPtr doc,const xmlChar * name)436 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
437     xmlEntitiesTablePtr table;
438     xmlEntityPtr ret;
439 
440     if (doc == NULL)
441 	return(NULL);
442     if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
443 	table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
444 	ret = xmlGetEntityFromTable(table, name);
445 	if (ret != NULL)
446 	    return(ret);
447     }
448     if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
449 	table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
450 	return(xmlGetEntityFromTable(table, name));
451     }
452     return(NULL);
453 }
454 
455 /**
456  * xmlGetDtdEntity:
457  * @doc:  the document referencing the entity
458  * @name:  the entity name
459  *
460  * Do an entity lookup in the DTD entity hash table and
461  * returns the corresponding entity, if found.
462  * Note: the first argument is the document node, not the DTD node.
463  *
464  * Returns A pointer to the entity structure or NULL if not found.
465  */
466 xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc,const xmlChar * name)467 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
468     xmlEntitiesTablePtr table;
469 
470     if (doc == NULL)
471 	return(NULL);
472     if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
473 	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
474 	return(xmlGetEntityFromTable(table, name));
475     }
476     return(NULL);
477 }
478 
479 /**
480  * xmlGetDocEntity:
481  * @doc:  the document referencing the entity
482  * @name:  the entity name
483  *
484  * Do an entity lookup in the document entity hash table and
485  * returns the corresponding entity, otherwise a lookup is done
486  * in the predefined entities too.
487  *
488  * Returns A pointer to the entity structure or NULL if not found.
489  */
490 xmlEntityPtr
xmlGetDocEntity(const xmlDoc * doc,const xmlChar * name)491 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
492     xmlEntityPtr cur;
493     xmlEntitiesTablePtr table;
494 
495     if (doc != NULL) {
496 	if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
497 	    table = (xmlEntitiesTablePtr) doc->intSubset->entities;
498 	    cur = xmlGetEntityFromTable(table, name);
499 	    if (cur != NULL)
500 		return(cur);
501 	}
502 	if (doc->standalone != 1) {
503 	    if ((doc->extSubset != NULL) &&
504 		(doc->extSubset->entities != NULL)) {
505 		table = (xmlEntitiesTablePtr) doc->extSubset->entities;
506 		cur = xmlGetEntityFromTable(table, name);
507 		if (cur != NULL)
508 		    return(cur);
509 	    }
510 	}
511     }
512     return(xmlGetPredefinedEntity(name));
513 }
514 
515 /*
516  * Macro used to grow the current buffer.
517  */
518 #define growBufferReentrant() {						\
519     xmlChar *tmp;                                                       \
520     size_t new_size = buffer_size * 2;                                  \
521     if (new_size < buffer_size) goto mem_error;                         \
522     tmp = (xmlChar *) xmlRealloc(buffer, new_size);	                \
523     if (tmp == NULL) goto mem_error;                                    \
524     buffer = tmp;							\
525     buffer_size = new_size;						\
526 }
527 
528 /**
529  * xmlEncodeEntitiesInternal:
530  * @doc:  the document containing the string
531  * @input:  A string to convert to XML.
532  * @attr: are we handling an attribute value
533  *
534  * Do a global encoding of a string, replacing the predefined entities
535  * and non ASCII values with their entities and CharRef counterparts.
536  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
537  * must be deallocated.
538  *
539  * Returns A newly allocated string with the substitution done.
540  */
541 static xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc,const xmlChar * input,int attr)542 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
543     const xmlChar *cur = input;
544     xmlChar *buffer = NULL;
545     xmlChar *out = NULL;
546     size_t buffer_size = 0;
547     int html = 0;
548 
549     if (input == NULL) return(NULL);
550     if (doc != NULL)
551         html = (doc->type == XML_HTML_DOCUMENT_NODE);
552 
553     /*
554      * allocate an translation buffer.
555      */
556     buffer_size = 1000;
557     buffer = (xmlChar *) xmlMalloc(buffer_size);
558     if (buffer == NULL)
559 	return(NULL);
560     out = buffer;
561 
562     while (*cur != '\0') {
563         size_t indx = out - buffer;
564         if (indx + 100 > buffer_size) {
565 
566 	    growBufferReentrant();
567 	    out = &buffer[indx];
568 	}
569 
570 	/*
571 	 * By default one have to encode at least '<', '>', '"' and '&' !
572 	 */
573 	if (*cur == '<') {
574 	    const xmlChar *end;
575 
576 	    /*
577 	     * Special handling of server side include in HTML attributes
578 	     */
579 	    if (html && attr &&
580 	        (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
581 	        ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
582 	        while (cur != end) {
583 		    *out++ = *cur++;
584 		    indx = out - buffer;
585 		    if (indx + 100 > buffer_size) {
586 			growBufferReentrant();
587 			out = &buffer[indx];
588 		    }
589 		}
590 		*out++ = *cur++;
591 		*out++ = *cur++;
592 		*out++ = *cur++;
593 		continue;
594 	    }
595 	    *out++ = '&';
596 	    *out++ = 'l';
597 	    *out++ = 't';
598 	    *out++ = ';';
599 	} else if (*cur == '>') {
600 	    *out++ = '&';
601 	    *out++ = 'g';
602 	    *out++ = 't';
603 	    *out++ = ';';
604 	} else if (*cur == '&') {
605 	    /*
606 	     * Special handling of &{...} construct from HTML 4, see
607 	     * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
608 	     */
609 	    if (html && attr && (cur[1] == '{') &&
610 	        (strchr((const char *) cur, '}'))) {
611 	        while (*cur != '}') {
612 		    *out++ = *cur++;
613 		    indx = out - buffer;
614 		    if (indx + 100 > buffer_size) {
615 			growBufferReentrant();
616 			out = &buffer[indx];
617 		    }
618 		}
619 		*out++ = *cur++;
620 		continue;
621 	    }
622 	    *out++ = '&';
623 	    *out++ = 'a';
624 	    *out++ = 'm';
625 	    *out++ = 'p';
626 	    *out++ = ';';
627 	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
628 	    (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
629 	    /*
630 	     * default case, just copy !
631 	     */
632 	    *out++ = *cur;
633 	} else if (*cur >= 0x80) {
634 	    if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
635 		/*
636 		 * Bjørn Reese <br@sseusa.com> provided the patch
637 	        xmlChar xc;
638 	        xc = (*cur & 0x3F) << 6;
639 	        if (cur[1] != 0) {
640 		    xc += *(++cur) & 0x3F;
641 		    *out++ = xc;
642 	        } else
643 		 */
644 		*out++ = *cur;
645 	    } else {
646 		/*
647 		 * We assume we have UTF-8 input.
648 		 * It must match either:
649 		 *   110xxxxx 10xxxxxx
650 		 *   1110xxxx 10xxxxxx 10xxxxxx
651 		 *   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
652 		 * That is:
653 		 *   cur[0] is 11xxxxxx
654 		 *   cur[1] is 10xxxxxx
655 		 *   cur[2] is 10xxxxxx if cur[0] is 111xxxxx
656 		 *   cur[3] is 10xxxxxx if cur[0] is 1111xxxx
657 		 *   cur[0] is not 11111xxx
658 		 */
659 		char buf[13], *ptr;
660 		int val, l;
661 
662                 l = 4;
663                 val = xmlGetUTF8Char(cur, &l);
664                 if (val < 0) {
665                     val = 0xFFFD;
666                     cur++;
667                 } else {
668                     if (!IS_CHAR(val))
669                         val = 0xFFFD;
670                     cur += l;
671 		}
672 		/*
673 		 * We could do multiple things here. Just save as a char ref
674 		 */
675 		snprintf(buf, sizeof(buf), "&#x%X;", val);
676 		buf[sizeof(buf) - 1] = 0;
677 		ptr = buf;
678 		while (*ptr != 0) *out++ = *ptr++;
679 		continue;
680 	    }
681 	} else if (IS_BYTE_CHAR(*cur)) {
682 	    char buf[11], *ptr;
683 
684 	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
685 	    buf[sizeof(buf) - 1] = 0;
686             ptr = buf;
687 	    while (*ptr != 0) *out++ = *ptr++;
688 	}
689 	cur++;
690     }
691     *out = 0;
692     return(buffer);
693 
694 mem_error:
695     xmlFree(buffer);
696     return(NULL);
697 }
698 
699 /**
700  * xmlEncodeAttributeEntities:
701  * @doc:  the document containing the string
702  * @input:  A string to convert to XML.
703  *
704  * Do a global encoding of a string, replacing the predefined entities
705  * and non ASCII values with their entities and CharRef counterparts for
706  * attribute values.
707  *
708  * Returns A newly allocated string with the substitution done.
709  */
710 xmlChar *
xmlEncodeAttributeEntities(xmlDocPtr doc,const xmlChar * input)711 xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
712     return xmlEncodeEntitiesInternal(doc, input, 1);
713 }
714 
715 /**
716  * xmlEncodeEntitiesReentrant:
717  * @doc:  the document containing the string
718  * @input:  A string to convert to XML.
719  *
720  * Do a global encoding of a string, replacing the predefined entities
721  * and non ASCII values with their entities and CharRef counterparts.
722  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
723  * must be deallocated.
724  *
725  * Returns A newly allocated string with the substitution done.
726  */
727 xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc,const xmlChar * input)728 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
729     return xmlEncodeEntitiesInternal(doc, input, 0);
730 }
731 
732 /**
733  * xmlEncodeSpecialChars:
734  * @doc:  the document containing the string
735  * @input:  A string to convert to XML.
736  *
737  * Do a global encoding of a string, replacing the predefined entities
738  * this routine is reentrant, and result must be deallocated.
739  *
740  * Returns A newly allocated string with the substitution done.
741  */
742 xmlChar *
xmlEncodeSpecialChars(const xmlDoc * doc ATTRIBUTE_UNUSED,const xmlChar * input)743 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
744     const xmlChar *cur = input;
745     xmlChar *buffer = NULL;
746     xmlChar *out = NULL;
747     size_t buffer_size = 0;
748     if (input == NULL) return(NULL);
749 
750     /*
751      * allocate an translation buffer.
752      */
753     buffer_size = 1000;
754     buffer = (xmlChar *) xmlMalloc(buffer_size);
755     if (buffer == NULL)
756 	return(NULL);
757     out = buffer;
758 
759     while (*cur != '\0') {
760         size_t indx = out - buffer;
761         if (indx + 10 > buffer_size) {
762 
763 	    growBufferReentrant();
764 	    out = &buffer[indx];
765 	}
766 
767 	/*
768 	 * By default one have to encode at least '<', '>', '"' and '&' !
769 	 */
770 	if (*cur == '<') {
771 	    *out++ = '&';
772 	    *out++ = 'l';
773 	    *out++ = 't';
774 	    *out++ = ';';
775 	} else if (*cur == '>') {
776 	    *out++ = '&';
777 	    *out++ = 'g';
778 	    *out++ = 't';
779 	    *out++ = ';';
780 	} else if (*cur == '&') {
781 	    *out++ = '&';
782 	    *out++ = 'a';
783 	    *out++ = 'm';
784 	    *out++ = 'p';
785 	    *out++ = ';';
786 	} else if (*cur == '"') {
787 	    *out++ = '&';
788 	    *out++ = 'q';
789 	    *out++ = 'u';
790 	    *out++ = 'o';
791 	    *out++ = 't';
792 	    *out++ = ';';
793 	} else if (*cur == '\r') {
794 	    *out++ = '&';
795 	    *out++ = '#';
796 	    *out++ = '1';
797 	    *out++ = '3';
798 	    *out++ = ';';
799 	} else {
800 	    /*
801 	     * Works because on UTF-8, all extended sequences cannot
802 	     * result in bytes in the ASCII range.
803 	     */
804 	    *out++ = *cur;
805 	}
806 	cur++;
807     }
808     *out = 0;
809     return(buffer);
810 
811 mem_error:
812     xmlFree(buffer);
813     return(NULL);
814 }
815 
816 /**
817  * xmlCreateEntitiesTable:
818  *
819  * create and initialize an empty entities hash table.
820  * This really doesn't make sense and should be deprecated
821  *
822  * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
823  */
824 xmlEntitiesTablePtr
xmlCreateEntitiesTable(void)825 xmlCreateEntitiesTable(void) {
826     return((xmlEntitiesTablePtr) xmlHashCreate(0));
827 }
828 
829 /**
830  * xmlFreeEntityWrapper:
831  * @entity:  An entity
832  * @name:  its name
833  *
834  * Deallocate the memory used by an entities in the hash table.
835  */
836 static void
xmlFreeEntityWrapper(void * entity,const xmlChar * name ATTRIBUTE_UNUSED)837 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
838     if (entity != NULL)
839 	xmlFreeEntity((xmlEntityPtr) entity);
840 }
841 
842 /**
843  * xmlFreeEntitiesTable:
844  * @table:  An entity table
845  *
846  * Deallocate the memory used by an entities hash table.
847  */
848 void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table)849 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
850     xmlHashFree(table, xmlFreeEntityWrapper);
851 }
852 
853 #ifdef LIBXML_TREE_ENABLED
854 /**
855  * xmlCopyEntity:
856  * @ent:  An entity
857  *
858  * Build a copy of an entity
859  *
860  * Returns the new xmlEntitiesPtr or NULL in case of error.
861  */
862 static void *
xmlCopyEntity(void * payload,const xmlChar * name ATTRIBUTE_UNUSED)863 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
864     xmlEntityPtr ent = (xmlEntityPtr) payload;
865     xmlEntityPtr cur;
866 
867     cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
868     if (cur == NULL)
869 	return(NULL);
870     memset(cur, 0, sizeof(xmlEntity));
871     cur->type = XML_ENTITY_DECL;
872 
873     cur->etype = ent->etype;
874     if (ent->name != NULL) {
875 	cur->name = xmlStrdup(ent->name);
876         if (cur->name == NULL)
877             goto error;
878     }
879     if (ent->ExternalID != NULL) {
880 	cur->ExternalID = xmlStrdup(ent->ExternalID);
881         if (cur->ExternalID == NULL)
882             goto error;
883     }
884     if (ent->SystemID != NULL) {
885 	cur->SystemID = xmlStrdup(ent->SystemID);
886         if (cur->SystemID == NULL)
887             goto error;
888     }
889     if (ent->content != NULL) {
890 	cur->content = xmlStrdup(ent->content);
891         if (cur->content == NULL)
892             goto error;
893     }
894     if (ent->orig != NULL) {
895 	cur->orig = xmlStrdup(ent->orig);
896         if (cur->orig == NULL)
897             goto error;
898     }
899     if (ent->URI != NULL) {
900 	cur->URI = xmlStrdup(ent->URI);
901         if (cur->URI == NULL)
902             goto error;
903     }
904     return(cur);
905 
906 error:
907     xmlFreeEntity(cur);
908     return(NULL);
909 }
910 
911 /**
912  * xmlCopyEntitiesTable:
913  * @table:  An entity table
914  *
915  * Build a copy of an entity table.
916  *
917  * Returns the new xmlEntitiesTablePtr or NULL in case of error.
918  */
919 xmlEntitiesTablePtr
xmlCopyEntitiesTable(xmlEntitiesTablePtr table)920 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
921     return(xmlHashCopySafe(table, xmlCopyEntity, xmlFreeEntityWrapper));
922 }
923 #endif /* LIBXML_TREE_ENABLED */
924 
925 #ifdef LIBXML_OUTPUT_ENABLED
926 
927 /**
928  * xmlDumpEntityDecl:
929  * @buf:  An XML buffer.
930  * @ent:  An entity table
931  *
932  * This will dump the content of the entity table as an XML DTD definition
933  */
934 void
xmlDumpEntityDecl(xmlBufferPtr buf,xmlEntityPtr ent)935 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
936     xmlSaveCtxtPtr save;
937 
938     if ((buf == NULL) || (ent == NULL))
939         return;
940 
941     save = xmlSaveToBuffer(buf, NULL, 0);
942     xmlSaveTree(save, (xmlNodePtr) ent);
943     if (xmlSaveFinish(save) != XML_ERR_OK)
944         xmlFree(xmlBufferDetach(buf));
945 }
946 
947 /**
948  * xmlDumpEntityDeclScan:
949  * @ent:  An entity table
950  * @buf:  An XML buffer.
951  *
952  * When using the hash table scan function, arguments need to be reversed
953  */
954 static void
xmlDumpEntityDeclScan(void * ent,void * save,const xmlChar * name ATTRIBUTE_UNUSED)955 xmlDumpEntityDeclScan(void *ent, void *save,
956                       const xmlChar *name ATTRIBUTE_UNUSED) {
957     xmlSaveTree(save, ent);
958 }
959 
960 /**
961  * xmlDumpEntitiesTable:
962  * @buf:  An XML buffer.
963  * @table:  An entity table
964  *
965  * This will dump the content of the entity table as an XML DTD definition
966  */
967 void
xmlDumpEntitiesTable(xmlBufferPtr buf,xmlEntitiesTablePtr table)968 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
969     xmlSaveCtxtPtr save;
970 
971     if ((buf == NULL) || (table == NULL))
972         return;
973 
974     save = xmlSaveToBuffer(buf, NULL, 0);
975     xmlHashScan(table, xmlDumpEntityDeclScan, save);
976     if (xmlSaveFinish(save) != XML_ERR_OK)
977         xmlFree(xmlBufferDetach(buf));
978 }
979 #endif /* LIBXML_OUTPUT_ENABLED */
980