1 /*
2 * entities.c : implementation for the XML entities handling
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9 /* To avoid EBCDIC trouble when parsing on zOS */
10 #if defined(__MVS__)
11 #pragma convert("ISO8859-1")
12 #endif
13
14 #define IN_LIBXML
15 #include "libxml.h"
16
17 #include <string.h>
18 #ifdef HAVE_STDLIB_H
19 #include <stdlib.h>
20 #endif
21 #include <libxml/xmlmemory.h>
22 #include <libxml/hash.h>
23 #include <libxml/entities.h>
24 #include <libxml/parser.h>
25 #include <libxml/parserInternals.h>
26 #include <libxml/xmlerror.h>
27 #include <libxml/globals.h>
28 #include <libxml/dict.h>
29
30 #include "save.h"
31
32 /*
33 * The XML predefined entities.
34 */
35
36 static xmlEntity xmlEntityLt = {
37 NULL, XML_ENTITY_DECL, BAD_CAST "lt",
38 NULL, NULL, NULL, NULL, NULL, NULL,
39 BAD_CAST "<", BAD_CAST "<", 1,
40 XML_INTERNAL_PREDEFINED_ENTITY,
41 NULL, NULL, NULL, NULL, 0, 1
42 };
43 static xmlEntity xmlEntityGt = {
44 NULL, XML_ENTITY_DECL, BAD_CAST "gt",
45 NULL, NULL, NULL, NULL, NULL, NULL,
46 BAD_CAST ">", BAD_CAST ">", 1,
47 XML_INTERNAL_PREDEFINED_ENTITY,
48 NULL, NULL, NULL, NULL, 0, 1
49 };
50 static xmlEntity xmlEntityAmp = {
51 NULL, XML_ENTITY_DECL, BAD_CAST "amp",
52 NULL, NULL, NULL, NULL, NULL, NULL,
53 BAD_CAST "&", BAD_CAST "&", 1,
54 XML_INTERNAL_PREDEFINED_ENTITY,
55 NULL, NULL, NULL, NULL, 0, 1
56 };
57 static xmlEntity xmlEntityQuot = {
58 NULL, XML_ENTITY_DECL, BAD_CAST "quot",
59 NULL, NULL, NULL, NULL, NULL, NULL,
60 BAD_CAST "\"", BAD_CAST "\"", 1,
61 XML_INTERNAL_PREDEFINED_ENTITY,
62 NULL, NULL, NULL, NULL, 0, 1
63 };
64 static xmlEntity xmlEntityApos = {
65 NULL, XML_ENTITY_DECL, BAD_CAST "apos",
66 NULL, NULL, NULL, NULL, NULL, NULL,
67 BAD_CAST "'", BAD_CAST "'", 1,
68 XML_INTERNAL_PREDEFINED_ENTITY,
69 NULL, NULL, NULL, NULL, 0, 1
70 };
71
72 /**
73 * xmlEntitiesErrMemory:
74 * @extra: extra information
75 *
76 * Handle an out of memory condition
77 */
78 static void
xmlEntitiesErrMemory(const char * extra)79 xmlEntitiesErrMemory(const char *extra)
80 {
81 __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra);
82 }
83
84 /**
85 * xmlEntitiesErr:
86 * @code: the error code
87 * @msg: the message
88 *
89 * Raise an error.
90 */
91 static void LIBXML_ATTR_FORMAT(2,0)
xmlEntitiesErr(xmlParserErrors code,const char * msg)92 xmlEntitiesErr(xmlParserErrors code, const char *msg)
93 {
94 __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL);
95 }
96
97 /**
98 * xmlEntitiesWarn:
99 * @code: the error code
100 * @msg: the message
101 *
102 * Raise a warning.
103 */
104 static void LIBXML_ATTR_FORMAT(2,0)
xmlEntitiesWarn(xmlParserErrors code,const char * msg,const xmlChar * str1)105 xmlEntitiesWarn(xmlParserErrors code, const char *msg, const xmlChar *str1)
106 {
107 __xmlRaiseError(NULL, NULL, NULL,
108 NULL, NULL, XML_FROM_TREE, code,
109 XML_ERR_WARNING, NULL, 0,
110 (const char *)str1, NULL, NULL, 0, 0,
111 msg, (const char *)str1, NULL);
112 }
113
114 /*
115 * xmlFreeEntity : clean-up an entity record.
116 */
117 static void
xmlFreeEntity(xmlEntityPtr entity)118 xmlFreeEntity(xmlEntityPtr entity)
119 {
120 xmlDictPtr dict = NULL;
121
122 if (entity == NULL)
123 return;
124
125 if (entity->doc != NULL)
126 dict = entity->doc->dict;
127
128
129 if ((entity->children) && (entity->owner == 1) &&
130 (entity == (xmlEntityPtr) entity->children->parent))
131 xmlFreeNodeList(entity->children);
132 if (dict != NULL) {
133 if ((entity->name != NULL) && (!xmlDictOwns(dict, entity->name)))
134 xmlFree((char *) entity->name);
135 if ((entity->ExternalID != NULL) &&
136 (!xmlDictOwns(dict, entity->ExternalID)))
137 xmlFree((char *) entity->ExternalID);
138 if ((entity->SystemID != NULL) &&
139 (!xmlDictOwns(dict, entity->SystemID)))
140 xmlFree((char *) entity->SystemID);
141 if ((entity->URI != NULL) && (!xmlDictOwns(dict, entity->URI)))
142 xmlFree((char *) entity->URI);
143 if ((entity->content != NULL)
144 && (!xmlDictOwns(dict, entity->content)))
145 xmlFree((char *) entity->content);
146 if ((entity->orig != NULL) && (!xmlDictOwns(dict, entity->orig)))
147 xmlFree((char *) entity->orig);
148 } else {
149 if (entity->name != NULL)
150 xmlFree((char *) entity->name);
151 if (entity->ExternalID != NULL)
152 xmlFree((char *) entity->ExternalID);
153 if (entity->SystemID != NULL)
154 xmlFree((char *) entity->SystemID);
155 if (entity->URI != NULL)
156 xmlFree((char *) entity->URI);
157 if (entity->content != NULL)
158 xmlFree((char *) entity->content);
159 if (entity->orig != NULL)
160 xmlFree((char *) entity->orig);
161 }
162 xmlFree(entity);
163 }
164
165 /*
166 * xmlCreateEntity:
167 *
168 * internal routine doing the entity node structures allocations
169 */
170 static xmlEntityPtr
xmlCreateEntity(xmlDictPtr dict,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)171 xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type,
172 const xmlChar *ExternalID, const xmlChar *SystemID,
173 const xmlChar *content) {
174 xmlEntityPtr ret;
175
176 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
177 if (ret == NULL) {
178 xmlEntitiesErrMemory("xmlCreateEntity: malloc failed");
179 return(NULL);
180 }
181 memset(ret, 0, sizeof(xmlEntity));
182 ret->type = XML_ENTITY_DECL;
183 ret->checked = 0;
184
185 /*
186 * fill the structure.
187 */
188 ret->etype = (xmlEntityType) type;
189 if (dict == NULL) {
190 ret->name = xmlStrdup(name);
191 if (ExternalID != NULL)
192 ret->ExternalID = xmlStrdup(ExternalID);
193 if (SystemID != NULL)
194 ret->SystemID = xmlStrdup(SystemID);
195 } else {
196 ret->name = xmlDictLookup(dict, name, -1);
197 if (ExternalID != NULL)
198 ret->ExternalID = xmlDictLookup(dict, ExternalID, -1);
199 if (SystemID != NULL)
200 ret->SystemID = xmlDictLookup(dict, SystemID, -1);
201 }
202 if (content != NULL) {
203 ret->length = xmlStrlen(content);
204 if ((dict != NULL) && (ret->length < 5))
205 ret->content = (xmlChar *)
206 xmlDictLookup(dict, content, ret->length);
207 else
208 ret->content = xmlStrndup(content, ret->length);
209 } else {
210 ret->length = 0;
211 ret->content = NULL;
212 }
213 ret->URI = NULL; /* to be computed by the layer knowing
214 the defining entity */
215 ret->orig = NULL;
216 ret->owner = 0;
217
218 return(ret);
219 }
220
221 /*
222 * xmlAddEntity : register a new entity for an entities table.
223 */
224 static xmlEntityPtr
xmlAddEntity(xmlDtdPtr dtd,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)225 xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
226 const xmlChar *ExternalID, const xmlChar *SystemID,
227 const xmlChar *content) {
228 xmlDictPtr dict = NULL;
229 xmlEntitiesTablePtr table = NULL;
230 xmlEntityPtr ret, predef;
231
232 if (name == NULL)
233 return(NULL);
234 if (dtd == NULL)
235 return(NULL);
236 if (dtd->doc != NULL)
237 dict = dtd->doc->dict;
238
239 switch (type) {
240 case XML_INTERNAL_GENERAL_ENTITY:
241 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
242 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
243 predef = xmlGetPredefinedEntity(name);
244 if (predef != NULL) {
245 int valid = 0;
246
247 /* 4.6 Predefined Entities */
248 if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
249 (content != NULL)) {
250 int c = predef->content[0];
251
252 if (((content[0] == c) && (content[1] == 0)) &&
253 ((c == '>') || (c == '\'') || (c == '"'))) {
254 valid = 1;
255 } else if ((content[0] == '&') && (content[1] == '#')) {
256 if (content[2] == 'x') {
257 xmlChar *hex = BAD_CAST "0123456789ABCDEF";
258 xmlChar ref[] = "00;";
259
260 ref[0] = hex[c / 16 % 16];
261 ref[1] = hex[c % 16];
262 if (xmlStrcasecmp(&content[3], ref) == 0)
263 valid = 1;
264 } else {
265 xmlChar ref[] = "00;";
266
267 ref[0] = '0' + c / 10 % 10;
268 ref[1] = '0' + c % 10;
269 if (xmlStrEqual(&content[2], ref))
270 valid = 1;
271 }
272 }
273 }
274 if (!valid) {
275 xmlEntitiesWarn(XML_ERR_ENTITY_PROCESSING,
276 "xmlAddEntity: invalid redeclaration of predefined"
277 " entity '%s'", name);
278 return(NULL);
279 }
280 }
281 if (dtd->entities == NULL)
282 dtd->entities = xmlHashCreateDict(0, dict);
283 table = dtd->entities;
284 break;
285 case XML_INTERNAL_PARAMETER_ENTITY:
286 case XML_EXTERNAL_PARAMETER_ENTITY:
287 if (dtd->pentities == NULL)
288 dtd->pentities = xmlHashCreateDict(0, dict);
289 table = dtd->pentities;
290 break;
291 case XML_INTERNAL_PREDEFINED_ENTITY:
292 return(NULL);
293 }
294 if (table == NULL)
295 return(NULL);
296 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
297 if (ret == NULL)
298 return(NULL);
299 ret->doc = dtd->doc;
300
301 if (xmlHashAddEntry(table, name, ret)) {
302 /*
303 * entity was already defined at another level.
304 */
305 xmlFreeEntity(ret);
306 return(NULL);
307 }
308 return(ret);
309 }
310
311 /**
312 * xmlGetPredefinedEntity:
313 * @name: the entity name
314 *
315 * Check whether this name is an predefined entity.
316 *
317 * Returns NULL if not, otherwise the entity
318 */
319 xmlEntityPtr
xmlGetPredefinedEntity(const xmlChar * name)320 xmlGetPredefinedEntity(const xmlChar *name) {
321 if (name == NULL) return(NULL);
322 switch (name[0]) {
323 case 'l':
324 if (xmlStrEqual(name, BAD_CAST "lt"))
325 return(&xmlEntityLt);
326 break;
327 case 'g':
328 if (xmlStrEqual(name, BAD_CAST "gt"))
329 return(&xmlEntityGt);
330 break;
331 case 'a':
332 if (xmlStrEqual(name, BAD_CAST "amp"))
333 return(&xmlEntityAmp);
334 if (xmlStrEqual(name, BAD_CAST "apos"))
335 return(&xmlEntityApos);
336 break;
337 case 'q':
338 if (xmlStrEqual(name, BAD_CAST "quot"))
339 return(&xmlEntityQuot);
340 break;
341 default:
342 break;
343 }
344 return(NULL);
345 }
346
347 /**
348 * xmlAddDtdEntity:
349 * @doc: the document
350 * @name: the entity name
351 * @type: the entity type XML_xxx_yyy_ENTITY
352 * @ExternalID: the entity external ID if available
353 * @SystemID: the entity system ID if available
354 * @content: the entity content
355 *
356 * Register a new entity for this document DTD external subset.
357 *
358 * Returns a pointer to the entity or NULL in case of error
359 */
360 xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)361 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
362 const xmlChar *ExternalID, const xmlChar *SystemID,
363 const xmlChar *content) {
364 xmlEntityPtr ret;
365 xmlDtdPtr dtd;
366
367 if (doc == NULL) {
368 xmlEntitiesErr(XML_DTD_NO_DOC,
369 "xmlAddDtdEntity: document is NULL");
370 return(NULL);
371 }
372 if (doc->extSubset == NULL) {
373 xmlEntitiesErr(XML_DTD_NO_DTD,
374 "xmlAddDtdEntity: document without external subset");
375 return(NULL);
376 }
377 dtd = doc->extSubset;
378 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
379 if (ret == NULL) return(NULL);
380
381 /*
382 * Link it to the DTD
383 */
384 ret->parent = dtd;
385 ret->doc = dtd->doc;
386 if (dtd->last == NULL) {
387 dtd->children = dtd->last = (xmlNodePtr) ret;
388 } else {
389 dtd->last->next = (xmlNodePtr) ret;
390 ret->prev = dtd->last;
391 dtd->last = (xmlNodePtr) ret;
392 }
393 return(ret);
394 }
395
396 /**
397 * xmlAddDocEntity:
398 * @doc: the document
399 * @name: the entity name
400 * @type: the entity type XML_xxx_yyy_ENTITY
401 * @ExternalID: the entity external ID if available
402 * @SystemID: the entity system ID if available
403 * @content: the entity content
404 *
405 * Register a new entity for this document.
406 *
407 * Returns a pointer to the entity or NULL in case of error
408 */
409 xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)410 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
411 const xmlChar *ExternalID, const xmlChar *SystemID,
412 const xmlChar *content) {
413 xmlEntityPtr ret;
414 xmlDtdPtr dtd;
415
416 if (doc == NULL) {
417 xmlEntitiesErr(XML_DTD_NO_DOC,
418 "xmlAddDocEntity: document is NULL");
419 return(NULL);
420 }
421 if (doc->intSubset == NULL) {
422 xmlEntitiesErr(XML_DTD_NO_DTD,
423 "xmlAddDocEntity: document without internal subset");
424 return(NULL);
425 }
426 dtd = doc->intSubset;
427 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
428 if (ret == NULL) return(NULL);
429
430 /*
431 * Link it to the DTD
432 */
433 ret->parent = dtd;
434 ret->doc = dtd->doc;
435 if (dtd->last == NULL) {
436 dtd->children = dtd->last = (xmlNodePtr) ret;
437 } else {
438 dtd->last->next = (xmlNodePtr) ret;
439 ret->prev = dtd->last;
440 dtd->last = (xmlNodePtr) ret;
441 }
442 return(ret);
443 }
444
445 /**
446 * xmlNewEntity:
447 * @doc: the document
448 * @name: the entity name
449 * @type: the entity type XML_xxx_yyy_ENTITY
450 * @ExternalID: the entity external ID if available
451 * @SystemID: the entity system ID if available
452 * @content: the entity content
453 *
454 * Create a new entity, this differs from xmlAddDocEntity() that if
455 * the document is NULL or has no internal subset defined, then an
456 * unlinked entity structure will be returned, it is then the responsibility
457 * of the caller to link it to the document later or free it when not needed
458 * anymore.
459 *
460 * Returns a pointer to the entity or NULL in case of error
461 */
462 xmlEntityPtr
xmlNewEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)463 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
464 const xmlChar *ExternalID, const xmlChar *SystemID,
465 const xmlChar *content) {
466 xmlEntityPtr ret;
467 xmlDictPtr dict;
468
469 if ((doc != NULL) && (doc->intSubset != NULL)) {
470 return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
471 }
472 if (doc != NULL)
473 dict = doc->dict;
474 else
475 dict = NULL;
476 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
477 if (ret == NULL)
478 return(NULL);
479 ret->doc = doc;
480 return(ret);
481 }
482
483 /**
484 * xmlGetEntityFromTable:
485 * @table: an entity table
486 * @name: the entity name
487 * @parameter: look for parameter entities
488 *
489 * Do an entity lookup in the table.
490 * returns the corresponding parameter entity, if found.
491 *
492 * Returns A pointer to the entity structure or NULL if not found.
493 */
494 static xmlEntityPtr
xmlGetEntityFromTable(xmlEntitiesTablePtr table,const xmlChar * name)495 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
496 return((xmlEntityPtr) xmlHashLookup(table, name));
497 }
498
499 /**
500 * xmlGetParameterEntity:
501 * @doc: the document referencing the entity
502 * @name: the entity name
503 *
504 * Do an entity lookup in the internal and external subsets and
505 * returns the corresponding parameter entity, if found.
506 *
507 * Returns A pointer to the entity structure or NULL if not found.
508 */
509 xmlEntityPtr
xmlGetParameterEntity(xmlDocPtr doc,const xmlChar * name)510 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
511 xmlEntitiesTablePtr table;
512 xmlEntityPtr ret;
513
514 if (doc == NULL)
515 return(NULL);
516 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
517 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
518 ret = xmlGetEntityFromTable(table, name);
519 if (ret != NULL)
520 return(ret);
521 }
522 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
523 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
524 return(xmlGetEntityFromTable(table, name));
525 }
526 return(NULL);
527 }
528
529 /**
530 * xmlGetDtdEntity:
531 * @doc: the document referencing the entity
532 * @name: the entity name
533 *
534 * Do an entity lookup in the DTD entity hash table and
535 * returns the corresponding entity, if found.
536 * Note: the first argument is the document node, not the DTD node.
537 *
538 * Returns A pointer to the entity structure or NULL if not found.
539 */
540 xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc,const xmlChar * name)541 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
542 xmlEntitiesTablePtr table;
543
544 if (doc == NULL)
545 return(NULL);
546 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
547 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
548 return(xmlGetEntityFromTable(table, name));
549 }
550 return(NULL);
551 }
552
553 /**
554 * xmlGetDocEntity:
555 * @doc: the document referencing the entity
556 * @name: the entity name
557 *
558 * Do an entity lookup in the document entity hash table and
559 * returns the corresponding entity, otherwise a lookup is done
560 * in the predefined entities too.
561 *
562 * Returns A pointer to the entity structure or NULL if not found.
563 */
564 xmlEntityPtr
xmlGetDocEntity(const xmlDoc * doc,const xmlChar * name)565 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
566 xmlEntityPtr cur;
567 xmlEntitiesTablePtr table;
568
569 if (doc != NULL) {
570 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
571 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
572 cur = xmlGetEntityFromTable(table, name);
573 if (cur != NULL)
574 return(cur);
575 }
576 if (doc->standalone != 1) {
577 if ((doc->extSubset != NULL) &&
578 (doc->extSubset->entities != NULL)) {
579 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
580 cur = xmlGetEntityFromTable(table, name);
581 if (cur != NULL)
582 return(cur);
583 }
584 }
585 }
586 return(xmlGetPredefinedEntity(name));
587 }
588
589 /*
590 * Macro used to grow the current buffer.
591 */
592 #define growBufferReentrant() { \
593 xmlChar *tmp; \
594 size_t new_size = buffer_size * 2; \
595 if (new_size < buffer_size) goto mem_error; \
596 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
597 if (tmp == NULL) goto mem_error; \
598 buffer = tmp; \
599 buffer_size = new_size; \
600 }
601
602 /**
603 * xmlEncodeEntitiesInternal:
604 * @doc: the document containing the string
605 * @input: A string to convert to XML.
606 * @attr: are we handling an attribute value
607 *
608 * Do a global encoding of a string, replacing the predefined entities
609 * and non ASCII values with their entities and CharRef counterparts.
610 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
611 * must be deallocated.
612 *
613 * Returns A newly allocated string with the substitution done.
614 */
615 static xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc,const xmlChar * input,int attr)616 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
617 const xmlChar *cur = input;
618 xmlChar *buffer = NULL;
619 xmlChar *out = NULL;
620 size_t buffer_size = 0;
621 int html = 0;
622
623 if (input == NULL) return(NULL);
624 if (doc != NULL)
625 html = (doc->type == XML_HTML_DOCUMENT_NODE);
626
627 /*
628 * allocate an translation buffer.
629 */
630 buffer_size = 1000;
631 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
632 if (buffer == NULL) {
633 xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
634 return(NULL);
635 }
636 out = buffer;
637
638 while (*cur != '\0') {
639 size_t indx = out - buffer;
640 if (indx + 100 > buffer_size) {
641
642 growBufferReentrant();
643 out = &buffer[indx];
644 }
645
646 /*
647 * By default one have to encode at least '<', '>', '"' and '&' !
648 */
649 if (*cur == '<') {
650 const xmlChar *end;
651
652 /*
653 * Special handling of server side include in HTML attributes
654 */
655 if (html && attr &&
656 (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
657 ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
658 while (cur != end) {
659 *out++ = *cur++;
660 indx = out - buffer;
661 if (indx + 100 > buffer_size) {
662 growBufferReentrant();
663 out = &buffer[indx];
664 }
665 }
666 *out++ = *cur++;
667 *out++ = *cur++;
668 *out++ = *cur++;
669 continue;
670 }
671 *out++ = '&';
672 *out++ = 'l';
673 *out++ = 't';
674 *out++ = ';';
675 } else if (*cur == '>') {
676 *out++ = '&';
677 *out++ = 'g';
678 *out++ = 't';
679 *out++ = ';';
680 } else if (*cur == '&') {
681 /*
682 * Special handling of &{...} construct from HTML 4, see
683 * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
684 */
685 if (html && attr && (cur[1] == '{') &&
686 (strchr((const char *) cur, '}'))) {
687 while (*cur != '}') {
688 *out++ = *cur++;
689 indx = out - buffer;
690 if (indx + 100 > buffer_size) {
691 growBufferReentrant();
692 out = &buffer[indx];
693 }
694 }
695 *out++ = *cur++;
696 continue;
697 }
698 *out++ = '&';
699 *out++ = 'a';
700 *out++ = 'm';
701 *out++ = 'p';
702 *out++ = ';';
703 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
704 (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
705 /*
706 * default case, just copy !
707 */
708 *out++ = *cur;
709 } else if (*cur >= 0x80) {
710 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
711 /*
712 * Bjørn Reese <br@sseusa.com> provided the patch
713 xmlChar xc;
714 xc = (*cur & 0x3F) << 6;
715 if (cur[1] != 0) {
716 xc += *(++cur) & 0x3F;
717 *out++ = xc;
718 } else
719 */
720 *out++ = *cur;
721 } else {
722 /*
723 * We assume we have UTF-8 input.
724 * It must match either:
725 * 110xxxxx 10xxxxxx
726 * 1110xxxx 10xxxxxx 10xxxxxx
727 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
728 * That is:
729 * cur[0] is 11xxxxxx
730 * cur[1] is 10xxxxxx
731 * cur[2] is 10xxxxxx if cur[0] is 111xxxxx
732 * cur[3] is 10xxxxxx if cur[0] is 1111xxxx
733 * cur[0] is not 11111xxx
734 */
735 char buf[11], *ptr;
736 int val = 0, l = 1;
737
738 if (((cur[0] & 0xC0) != 0xC0) ||
739 ((cur[1] & 0xC0) != 0x80) ||
740 (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
741 (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
742 (((cur[0] & 0xF8) == 0xF8))) {
743 xmlEntitiesErr(XML_CHECK_NOT_UTF8,
744 "xmlEncodeEntities: input not UTF-8");
745 if (doc != NULL)
746 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
747 snprintf(buf, sizeof(buf), "&#%d;", *cur);
748 buf[sizeof(buf) - 1] = 0;
749 ptr = buf;
750 while (*ptr != 0) *out++ = *ptr++;
751 cur++;
752 continue;
753 } else if (*cur < 0xE0) {
754 val = (cur[0]) & 0x1F;
755 val <<= 6;
756 val |= (cur[1]) & 0x3F;
757 l = 2;
758 } else if (*cur < 0xF0) {
759 val = (cur[0]) & 0x0F;
760 val <<= 6;
761 val |= (cur[1]) & 0x3F;
762 val <<= 6;
763 val |= (cur[2]) & 0x3F;
764 l = 3;
765 } else if (*cur < 0xF8) {
766 val = (cur[0]) & 0x07;
767 val <<= 6;
768 val |= (cur[1]) & 0x3F;
769 val <<= 6;
770 val |= (cur[2]) & 0x3F;
771 val <<= 6;
772 val |= (cur[3]) & 0x3F;
773 l = 4;
774 }
775 if ((l == 1) || (!IS_CHAR(val))) {
776 xmlEntitiesErr(XML_ERR_INVALID_CHAR,
777 "xmlEncodeEntities: char out of range\n");
778 if (doc != NULL)
779 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
780 snprintf(buf, sizeof(buf), "&#%d;", *cur);
781 buf[sizeof(buf) - 1] = 0;
782 ptr = buf;
783 while (*ptr != 0) *out++ = *ptr++;
784 cur++;
785 continue;
786 }
787 /*
788 * We could do multiple things here. Just save as a char ref
789 */
790 snprintf(buf, sizeof(buf), "&#x%X;", val);
791 buf[sizeof(buf) - 1] = 0;
792 ptr = buf;
793 while (*ptr != 0) *out++ = *ptr++;
794 cur += l;
795 continue;
796 }
797 } else if (IS_BYTE_CHAR(*cur)) {
798 char buf[11], *ptr;
799
800 snprintf(buf, sizeof(buf), "&#%d;", *cur);
801 buf[sizeof(buf) - 1] = 0;
802 ptr = buf;
803 while (*ptr != 0) *out++ = *ptr++;
804 }
805 cur++;
806 }
807 *out = 0;
808 return(buffer);
809
810 mem_error:
811 xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
812 xmlFree(buffer);
813 return(NULL);
814 }
815
816 /**
817 * xmlEncodeAttributeEntities:
818 * @doc: the document containing the string
819 * @input: A string to convert to XML.
820 *
821 * Do a global encoding of a string, replacing the predefined entities
822 * and non ASCII values with their entities and CharRef counterparts for
823 * attribute values.
824 *
825 * Returns A newly allocated string with the substitution done.
826 */
827 xmlChar *
xmlEncodeAttributeEntities(xmlDocPtr doc,const xmlChar * input)828 xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
829 return xmlEncodeEntitiesInternal(doc, input, 1);
830 }
831
832 /**
833 * xmlEncodeEntitiesReentrant:
834 * @doc: the document containing the string
835 * @input: A string to convert to XML.
836 *
837 * Do a global encoding of a string, replacing the predefined entities
838 * and non ASCII values with their entities and CharRef counterparts.
839 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
840 * must be deallocated.
841 *
842 * Returns A newly allocated string with the substitution done.
843 */
844 xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc,const xmlChar * input)845 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
846 return xmlEncodeEntitiesInternal(doc, input, 0);
847 }
848
849 /**
850 * xmlEncodeSpecialChars:
851 * @doc: the document containing the string
852 * @input: A string to convert to XML.
853 *
854 * Do a global encoding of a string, replacing the predefined entities
855 * this routine is reentrant, and result must be deallocated.
856 *
857 * Returns A newly allocated string with the substitution done.
858 */
859 xmlChar *
xmlEncodeSpecialChars(const xmlDoc * doc ATTRIBUTE_UNUSED,const xmlChar * input)860 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
861 const xmlChar *cur = input;
862 xmlChar *buffer = NULL;
863 xmlChar *out = NULL;
864 size_t buffer_size = 0;
865 if (input == NULL) return(NULL);
866
867 /*
868 * allocate an translation buffer.
869 */
870 buffer_size = 1000;
871 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
872 if (buffer == NULL) {
873 xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed");
874 return(NULL);
875 }
876 out = buffer;
877
878 while (*cur != '\0') {
879 size_t indx = out - buffer;
880 if (indx + 10 > buffer_size) {
881
882 growBufferReentrant();
883 out = &buffer[indx];
884 }
885
886 /*
887 * By default one have to encode at least '<', '>', '"' and '&' !
888 */
889 if (*cur == '<') {
890 *out++ = '&';
891 *out++ = 'l';
892 *out++ = 't';
893 *out++ = ';';
894 } else if (*cur == '>') {
895 *out++ = '&';
896 *out++ = 'g';
897 *out++ = 't';
898 *out++ = ';';
899 } else if (*cur == '&') {
900 *out++ = '&';
901 *out++ = 'a';
902 *out++ = 'm';
903 *out++ = 'p';
904 *out++ = ';';
905 } else if (*cur == '"') {
906 *out++ = '&';
907 *out++ = 'q';
908 *out++ = 'u';
909 *out++ = 'o';
910 *out++ = 't';
911 *out++ = ';';
912 } else if (*cur == '\r') {
913 *out++ = '&';
914 *out++ = '#';
915 *out++ = '1';
916 *out++ = '3';
917 *out++ = ';';
918 } else {
919 /*
920 * Works because on UTF-8, all extended sequences cannot
921 * result in bytes in the ASCII range.
922 */
923 *out++ = *cur;
924 }
925 cur++;
926 }
927 *out = 0;
928 return(buffer);
929
930 mem_error:
931 xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed");
932 xmlFree(buffer);
933 return(NULL);
934 }
935
936 /**
937 * xmlCreateEntitiesTable:
938 *
939 * create and initialize an empty entities hash table.
940 * This really doesn't make sense and should be deprecated
941 *
942 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
943 */
944 xmlEntitiesTablePtr
xmlCreateEntitiesTable(void)945 xmlCreateEntitiesTable(void) {
946 return((xmlEntitiesTablePtr) xmlHashCreate(0));
947 }
948
949 /**
950 * xmlFreeEntityWrapper:
951 * @entity: An entity
952 * @name: its name
953 *
954 * Deallocate the memory used by an entities in the hash table.
955 */
956 static void
xmlFreeEntityWrapper(void * entity,const xmlChar * name ATTRIBUTE_UNUSED)957 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
958 if (entity != NULL)
959 xmlFreeEntity((xmlEntityPtr) entity);
960 }
961
962 /**
963 * xmlFreeEntitiesTable:
964 * @table: An entity table
965 *
966 * Deallocate the memory used by an entities hash table.
967 */
968 void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table)969 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
970 xmlHashFree(table, xmlFreeEntityWrapper);
971 }
972
973 #ifdef LIBXML_TREE_ENABLED
974 /**
975 * xmlCopyEntity:
976 * @ent: An entity
977 *
978 * Build a copy of an entity
979 *
980 * Returns the new xmlEntitiesPtr or NULL in case of error.
981 */
982 static void *
xmlCopyEntity(void * payload,const xmlChar * name ATTRIBUTE_UNUSED)983 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
984 xmlEntityPtr ent = (xmlEntityPtr) payload;
985 xmlEntityPtr cur;
986
987 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
988 if (cur == NULL) {
989 xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed");
990 return(NULL);
991 }
992 memset(cur, 0, sizeof(xmlEntity));
993 cur->type = XML_ENTITY_DECL;
994
995 cur->etype = ent->etype;
996 if (ent->name != NULL)
997 cur->name = xmlStrdup(ent->name);
998 if (ent->ExternalID != NULL)
999 cur->ExternalID = xmlStrdup(ent->ExternalID);
1000 if (ent->SystemID != NULL)
1001 cur->SystemID = xmlStrdup(ent->SystemID);
1002 if (ent->content != NULL)
1003 cur->content = xmlStrdup(ent->content);
1004 if (ent->orig != NULL)
1005 cur->orig = xmlStrdup(ent->orig);
1006 if (ent->URI != NULL)
1007 cur->URI = xmlStrdup(ent->URI);
1008 return(cur);
1009 }
1010
1011 /**
1012 * xmlCopyEntitiesTable:
1013 * @table: An entity table
1014 *
1015 * Build a copy of an entity table.
1016 *
1017 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
1018 */
1019 xmlEntitiesTablePtr
xmlCopyEntitiesTable(xmlEntitiesTablePtr table)1020 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
1021 return(xmlHashCopy(table, xmlCopyEntity));
1022 }
1023 #endif /* LIBXML_TREE_ENABLED */
1024
1025 #ifdef LIBXML_OUTPUT_ENABLED
1026
1027 /**
1028 * xmlDumpEntityContent:
1029 * @buf: An XML buffer.
1030 * @content: The entity content.
1031 *
1032 * This will dump the quoted string value, taking care of the special
1033 * treatment required by %
1034 */
1035 static void
xmlDumpEntityContent(xmlBufferPtr buf,const xmlChar * content)1036 xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) {
1037 if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return;
1038 if (xmlStrchr(content, '%')) {
1039 const xmlChar * base, *cur;
1040
1041 xmlBufferCCat(buf, "\"");
1042 base = cur = content;
1043 while (*cur != 0) {
1044 if (*cur == '"') {
1045 if (base != cur)
1046 xmlBufferAdd(buf, base, cur - base);
1047 xmlBufferAdd(buf, BAD_CAST """, 6);
1048 cur++;
1049 base = cur;
1050 } else if (*cur == '%') {
1051 if (base != cur)
1052 xmlBufferAdd(buf, base, cur - base);
1053 xmlBufferAdd(buf, BAD_CAST "%", 6);
1054 cur++;
1055 base = cur;
1056 } else {
1057 cur++;
1058 }
1059 }
1060 if (base != cur)
1061 xmlBufferAdd(buf, base, cur - base);
1062 xmlBufferCCat(buf, "\"");
1063 } else {
1064 xmlBufferWriteQuotedString(buf, content);
1065 }
1066 }
1067
1068 /**
1069 * xmlDumpEntityDecl:
1070 * @buf: An XML buffer.
1071 * @ent: An entity table
1072 *
1073 * This will dump the content of the entity table as an XML DTD definition
1074 */
1075 void
xmlDumpEntityDecl(xmlBufferPtr buf,xmlEntityPtr ent)1076 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
1077 if ((buf == NULL) || (ent == NULL)) return;
1078 switch (ent->etype) {
1079 case XML_INTERNAL_GENERAL_ENTITY:
1080 xmlBufferWriteChar(buf, "<!ENTITY ");
1081 xmlBufferWriteCHAR(buf, ent->name);
1082 xmlBufferWriteChar(buf, " ");
1083 if (ent->orig != NULL)
1084 xmlBufferWriteQuotedString(buf, ent->orig);
1085 else
1086 xmlDumpEntityContent(buf, ent->content);
1087 xmlBufferWriteChar(buf, ">\n");
1088 break;
1089 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1090 xmlBufferWriteChar(buf, "<!ENTITY ");
1091 xmlBufferWriteCHAR(buf, ent->name);
1092 if (ent->ExternalID != NULL) {
1093 xmlBufferWriteChar(buf, " PUBLIC ");
1094 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1095 xmlBufferWriteChar(buf, " ");
1096 xmlBufferWriteQuotedString(buf, ent->SystemID);
1097 } else {
1098 xmlBufferWriteChar(buf, " SYSTEM ");
1099 xmlBufferWriteQuotedString(buf, ent->SystemID);
1100 }
1101 xmlBufferWriteChar(buf, ">\n");
1102 break;
1103 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1104 xmlBufferWriteChar(buf, "<!ENTITY ");
1105 xmlBufferWriteCHAR(buf, ent->name);
1106 if (ent->ExternalID != NULL) {
1107 xmlBufferWriteChar(buf, " PUBLIC ");
1108 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1109 xmlBufferWriteChar(buf, " ");
1110 xmlBufferWriteQuotedString(buf, ent->SystemID);
1111 } else {
1112 xmlBufferWriteChar(buf, " SYSTEM ");
1113 xmlBufferWriteQuotedString(buf, ent->SystemID);
1114 }
1115 if (ent->content != NULL) { /* Should be true ! */
1116 xmlBufferWriteChar(buf, " NDATA ");
1117 if (ent->orig != NULL)
1118 xmlBufferWriteCHAR(buf, ent->orig);
1119 else
1120 xmlBufferWriteCHAR(buf, ent->content);
1121 }
1122 xmlBufferWriteChar(buf, ">\n");
1123 break;
1124 case XML_INTERNAL_PARAMETER_ENTITY:
1125 xmlBufferWriteChar(buf, "<!ENTITY % ");
1126 xmlBufferWriteCHAR(buf, ent->name);
1127 xmlBufferWriteChar(buf, " ");
1128 if (ent->orig == NULL)
1129 xmlDumpEntityContent(buf, ent->content);
1130 else
1131 xmlBufferWriteQuotedString(buf, ent->orig);
1132 xmlBufferWriteChar(buf, ">\n");
1133 break;
1134 case XML_EXTERNAL_PARAMETER_ENTITY:
1135 xmlBufferWriteChar(buf, "<!ENTITY % ");
1136 xmlBufferWriteCHAR(buf, ent->name);
1137 if (ent->ExternalID != NULL) {
1138 xmlBufferWriteChar(buf, " PUBLIC ");
1139 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1140 xmlBufferWriteChar(buf, " ");
1141 xmlBufferWriteQuotedString(buf, ent->SystemID);
1142 } else {
1143 xmlBufferWriteChar(buf, " SYSTEM ");
1144 xmlBufferWriteQuotedString(buf, ent->SystemID);
1145 }
1146 xmlBufferWriteChar(buf, ">\n");
1147 break;
1148 default:
1149 xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY,
1150 "xmlDumpEntitiesDecl: internal: unknown type entity type");
1151 }
1152 }
1153
1154 /**
1155 * xmlDumpEntityDeclScan:
1156 * @ent: An entity table
1157 * @buf: An XML buffer.
1158 *
1159 * When using the hash table scan function, arguments need to be reversed
1160 */
1161 static void
xmlDumpEntityDeclScan(void * ent,void * buf,const xmlChar * name ATTRIBUTE_UNUSED)1162 xmlDumpEntityDeclScan(void *ent, void *buf,
1163 const xmlChar *name ATTRIBUTE_UNUSED) {
1164 xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent);
1165 }
1166
1167 /**
1168 * xmlDumpEntitiesTable:
1169 * @buf: An XML buffer.
1170 * @table: An entity table
1171 *
1172 * This will dump the content of the entity table as an XML DTD definition
1173 */
1174 void
xmlDumpEntitiesTable(xmlBufferPtr buf,xmlEntitiesTablePtr table)1175 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1176 xmlHashScan(table, xmlDumpEntityDeclScan, buf);
1177 }
1178 #endif /* LIBXML_OUTPUT_ENABLED */
1179