1 /*
2 * entities.c : implementation for the XML entities handling
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9 /* To avoid EBCDIC trouble when parsing on zOS */
10 #if defined(__MVS__)
11 #pragma convert("ISO8859-1")
12 #endif
13
14 #define IN_LIBXML
15 #include "libxml.h"
16
17 #include <string.h>
18 #include <stdlib.h>
19
20 #include <libxml/xmlmemory.h>
21 #include <libxml/hash.h>
22 #include <libxml/entities.h>
23 #include <libxml/parser.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/xmlerror.h>
26 #include <libxml/dict.h>
27 #include <libxml/xmlsave.h>
28
29 #include "private/entities.h"
30 #include "private/error.h"
31 #include "private/parser.h"
32
33 #ifndef SIZE_MAX
34 #define SIZE_MAX ((size_t) -1)
35 #endif
36
37 /*
38 * The XML predefined entities.
39 */
40
41 static xmlEntity xmlEntityLt = {
42 NULL, XML_ENTITY_DECL, BAD_CAST "lt",
43 NULL, NULL, NULL, NULL, NULL, NULL,
44 BAD_CAST "<", BAD_CAST "<", 1,
45 XML_INTERNAL_PREDEFINED_ENTITY,
46 NULL, NULL, NULL, NULL, 0, 0, 0
47 };
48 static xmlEntity xmlEntityGt = {
49 NULL, XML_ENTITY_DECL, BAD_CAST "gt",
50 NULL, NULL, NULL, NULL, NULL, NULL,
51 BAD_CAST ">", BAD_CAST ">", 1,
52 XML_INTERNAL_PREDEFINED_ENTITY,
53 NULL, NULL, NULL, NULL, 0, 0, 0
54 };
55 static xmlEntity xmlEntityAmp = {
56 NULL, XML_ENTITY_DECL, BAD_CAST "amp",
57 NULL, NULL, NULL, NULL, NULL, NULL,
58 BAD_CAST "&", BAD_CAST "&", 1,
59 XML_INTERNAL_PREDEFINED_ENTITY,
60 NULL, NULL, NULL, NULL, 0, 0, 0
61 };
62 static xmlEntity xmlEntityQuot = {
63 NULL, XML_ENTITY_DECL, BAD_CAST "quot",
64 NULL, NULL, NULL, NULL, NULL, NULL,
65 BAD_CAST "\"", BAD_CAST "\"", 1,
66 XML_INTERNAL_PREDEFINED_ENTITY,
67 NULL, NULL, NULL, NULL, 0, 0, 0
68 };
69 static xmlEntity xmlEntityApos = {
70 NULL, XML_ENTITY_DECL, BAD_CAST "apos",
71 NULL, NULL, NULL, NULL, NULL, NULL,
72 BAD_CAST "'", BAD_CAST "'", 1,
73 XML_INTERNAL_PREDEFINED_ENTITY,
74 NULL, NULL, NULL, NULL, 0, 0, 0
75 };
76
77 /*
78 * xmlFreeEntity:
79 * @entity: an entity
80 *
81 * Frees the entity.
82 */
83 void
xmlFreeEntity(xmlEntityPtr entity)84 xmlFreeEntity(xmlEntityPtr entity)
85 {
86 xmlDictPtr dict = NULL;
87
88 if (entity == NULL)
89 return;
90
91 if (entity->doc != NULL)
92 dict = entity->doc->dict;
93
94
95 if ((entity->children) &&
96 (entity == (xmlEntityPtr) entity->children->parent))
97 xmlFreeNodeList(entity->children);
98 if ((entity->name != NULL) &&
99 ((dict == NULL) || (!xmlDictOwns(dict, entity->name))))
100 xmlFree((char *) entity->name);
101 if (entity->ExternalID != NULL)
102 xmlFree((char *) entity->ExternalID);
103 if (entity->SystemID != NULL)
104 xmlFree((char *) entity->SystemID);
105 if (entity->URI != NULL)
106 xmlFree((char *) entity->URI);
107 if (entity->content != NULL)
108 xmlFree((char *) entity->content);
109 if (entity->orig != NULL)
110 xmlFree((char *) entity->orig);
111 xmlFree(entity);
112 }
113
114 /*
115 * xmlCreateEntity:
116 *
117 * internal routine doing the entity node structures allocations
118 */
119 static xmlEntityPtr
xmlCreateEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)120 xmlCreateEntity(xmlDocPtr doc, const xmlChar *name, int type,
121 const xmlChar *ExternalID, const xmlChar *SystemID,
122 const xmlChar *content) {
123 xmlEntityPtr ret;
124
125 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
126 if (ret == NULL)
127 return(NULL);
128 memset(ret, 0, sizeof(xmlEntity));
129 ret->doc = doc;
130 ret->type = XML_ENTITY_DECL;
131
132 /*
133 * fill the structure.
134 */
135 ret->etype = (xmlEntityType) type;
136 if ((doc == NULL) || (doc->dict == NULL))
137 ret->name = xmlStrdup(name);
138 else
139 ret->name = xmlDictLookup(doc->dict, name, -1);
140 if (ret->name == NULL)
141 goto error;
142 if (ExternalID != NULL) {
143 ret->ExternalID = xmlStrdup(ExternalID);
144 if (ret->ExternalID == NULL)
145 goto error;
146 }
147 if (SystemID != NULL) {
148 ret->SystemID = xmlStrdup(SystemID);
149 if (ret->SystemID == NULL)
150 goto error;
151 }
152 if (content != NULL) {
153 ret->length = xmlStrlen(content);
154 ret->content = xmlStrndup(content, ret->length);
155 if (ret->content == NULL)
156 goto error;
157 } else {
158 ret->length = 0;
159 ret->content = NULL;
160 }
161 ret->URI = NULL; /* to be computed by the layer knowing
162 the defining entity */
163 ret->orig = NULL;
164
165 return(ret);
166
167 error:
168 xmlFreeEntity(ret);
169 return(NULL);
170 }
171
172 /**
173 * xmlAddEntity:
174 * @doc: the document
175 * @extSubset: add to the external or internal subset
176 * @name: the entity name
177 * @type: the entity type XML_xxx_yyy_ENTITY
178 * @ExternalID: the entity external ID if available
179 * @SystemID: the entity system ID if available
180 * @content: the entity content
181 * @out: pointer to resulting entity (optional)
182 *
183 * Register a new entity for this document.
184 *
185 * Available since 2.13.0.
186 *
187 * Returns an xmlParserErrors error code.
188 */
189 int
xmlAddEntity(xmlDocPtr doc,int extSubset,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content,xmlEntityPtr * out)190 xmlAddEntity(xmlDocPtr doc, int extSubset, const xmlChar *name, int type,
191 const xmlChar *ExternalID, const xmlChar *SystemID,
192 const xmlChar *content, xmlEntityPtr *out) {
193 xmlDtdPtr dtd;
194 xmlDictPtr dict = NULL;
195 xmlEntitiesTablePtr table = NULL;
196 xmlEntityPtr ret, predef;
197 int res;
198
199 if (out != NULL)
200 *out = NULL;
201 if ((doc == NULL) || (name == NULL))
202 return(XML_ERR_ARGUMENT);
203 dict = doc->dict;
204
205 if (extSubset)
206 dtd = doc->extSubset;
207 else
208 dtd = doc->intSubset;
209 if (dtd == NULL)
210 return(XML_DTD_NO_DTD);
211
212 switch (type) {
213 case XML_INTERNAL_GENERAL_ENTITY:
214 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
215 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
216 predef = xmlGetPredefinedEntity(name);
217 if (predef != NULL) {
218 int valid = 0;
219
220 /* 4.6 Predefined Entities */
221 if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
222 (content != NULL)) {
223 int c = predef->content[0];
224
225 if (((content[0] == c) && (content[1] == 0)) &&
226 ((c == '>') || (c == '\'') || (c == '"'))) {
227 valid = 1;
228 } else if ((content[0] == '&') && (content[1] == '#')) {
229 if (content[2] == 'x') {
230 xmlChar *hex = BAD_CAST "0123456789ABCDEF";
231 xmlChar ref[] = "00;";
232
233 ref[0] = hex[c / 16 % 16];
234 ref[1] = hex[c % 16];
235 if (xmlStrcasecmp(&content[3], ref) == 0)
236 valid = 1;
237 } else {
238 xmlChar ref[] = "00;";
239
240 ref[0] = '0' + c / 10 % 10;
241 ref[1] = '0' + c % 10;
242 if (xmlStrEqual(&content[2], ref))
243 valid = 1;
244 }
245 }
246 }
247 if (!valid)
248 return(XML_ERR_REDECL_PREDEF_ENTITY);
249 }
250 if (dtd->entities == NULL) {
251 dtd->entities = xmlHashCreateDict(0, dict);
252 if (dtd->entities == NULL)
253 return(XML_ERR_NO_MEMORY);
254 }
255 table = dtd->entities;
256 break;
257 case XML_INTERNAL_PARAMETER_ENTITY:
258 case XML_EXTERNAL_PARAMETER_ENTITY:
259 if (dtd->pentities == NULL) {
260 dtd->pentities = xmlHashCreateDict(0, dict);
261 if (dtd->pentities == NULL)
262 return(XML_ERR_NO_MEMORY);
263 }
264 table = dtd->pentities;
265 break;
266 default:
267 return(XML_ERR_ARGUMENT);
268 }
269 ret = xmlCreateEntity(dtd->doc, name, type, ExternalID, SystemID, content);
270 if (ret == NULL)
271 return(XML_ERR_NO_MEMORY);
272
273 res = xmlHashAdd(table, name, ret);
274 if (res < 0) {
275 xmlFreeEntity(ret);
276 return(XML_ERR_NO_MEMORY);
277 } else if (res == 0) {
278 /*
279 * entity was already defined at another level.
280 */
281 xmlFreeEntity(ret);
282 return(XML_WAR_ENTITY_REDEFINED);
283 }
284
285 /*
286 * Link it to the DTD
287 */
288 ret->parent = dtd;
289 ret->doc = dtd->doc;
290 if (dtd->last == NULL) {
291 dtd->children = dtd->last = (xmlNodePtr) ret;
292 } else {
293 dtd->last->next = (xmlNodePtr) ret;
294 ret->prev = dtd->last;
295 dtd->last = (xmlNodePtr) ret;
296 }
297
298 if (out != NULL)
299 *out = ret;
300 return(0);
301 }
302
303 /**
304 * xmlGetPredefinedEntity:
305 * @name: the entity name
306 *
307 * Check whether this name is an predefined entity.
308 *
309 * Returns NULL if not, otherwise the entity
310 */
311 xmlEntityPtr
xmlGetPredefinedEntity(const xmlChar * name)312 xmlGetPredefinedEntity(const xmlChar *name) {
313 if (name == NULL) return(NULL);
314 switch (name[0]) {
315 case 'l':
316 if (xmlStrEqual(name, BAD_CAST "lt"))
317 return(&xmlEntityLt);
318 break;
319 case 'g':
320 if (xmlStrEqual(name, BAD_CAST "gt"))
321 return(&xmlEntityGt);
322 break;
323 case 'a':
324 if (xmlStrEqual(name, BAD_CAST "amp"))
325 return(&xmlEntityAmp);
326 if (xmlStrEqual(name, BAD_CAST "apos"))
327 return(&xmlEntityApos);
328 break;
329 case 'q':
330 if (xmlStrEqual(name, BAD_CAST "quot"))
331 return(&xmlEntityQuot);
332 break;
333 default:
334 break;
335 }
336 return(NULL);
337 }
338
339 /**
340 * xmlAddDtdEntity:
341 * @doc: the document
342 * @name: the entity name
343 * @type: the entity type XML_xxx_yyy_ENTITY
344 * @ExternalID: the entity external ID if available
345 * @SystemID: the entity system ID if available
346 * @content: the entity content
347 *
348 * Register a new entity for this document DTD external subset.
349 *
350 * Returns a pointer to the entity or NULL in case of error
351 */
352 xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)353 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
354 const xmlChar *ExternalID, const xmlChar *SystemID,
355 const xmlChar *content) {
356 xmlEntityPtr ret;
357
358 xmlAddEntity(doc, 1, name, type, ExternalID, SystemID, content, &ret);
359 return(ret);
360 }
361
362 /**
363 * xmlAddDocEntity:
364 * @doc: the document
365 * @name: the entity name
366 * @type: the entity type XML_xxx_yyy_ENTITY
367 * @ExternalID: the entity external ID if available
368 * @SystemID: the entity system ID if available
369 * @content: the entity content
370 *
371 * Register a new entity for this document.
372 *
373 * Returns a pointer to the entity or NULL in case of error
374 */
375 xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)376 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
377 const xmlChar *ExternalID, const xmlChar *SystemID,
378 const xmlChar *content) {
379 xmlEntityPtr ret;
380
381 xmlAddEntity(doc, 0, name, type, ExternalID, SystemID, content, &ret);
382 return(ret);
383 }
384
385 /**
386 * xmlNewEntity:
387 * @doc: the document
388 * @name: the entity name
389 * @type: the entity type XML_xxx_yyy_ENTITY
390 * @ExternalID: the entity external ID if available
391 * @SystemID: the entity system ID if available
392 * @content: the entity content
393 *
394 * Create a new entity, this differs from xmlAddDocEntity() that if
395 * the document is NULL or has no internal subset defined, then an
396 * unlinked entity structure will be returned, it is then the responsibility
397 * of the caller to link it to the document later or free it when not needed
398 * anymore.
399 *
400 * Returns a pointer to the entity or NULL in case of error
401 */
402 xmlEntityPtr
xmlNewEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)403 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
404 const xmlChar *ExternalID, const xmlChar *SystemID,
405 const xmlChar *content) {
406 if ((doc != NULL) && (doc->intSubset != NULL)) {
407 return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
408 }
409 if (name == NULL)
410 return(NULL);
411 return(xmlCreateEntity(doc, name, type, ExternalID, SystemID, content));
412 }
413
414 /**
415 * xmlGetEntityFromTable:
416 * @table: an entity table
417 * @name: the entity name
418 * @parameter: look for parameter entities
419 *
420 * Do an entity lookup in the table.
421 * returns the corresponding parameter entity, if found.
422 *
423 * Returns A pointer to the entity structure or NULL if not found.
424 */
425 static xmlEntityPtr
xmlGetEntityFromTable(xmlEntitiesTablePtr table,const xmlChar * name)426 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
427 return((xmlEntityPtr) xmlHashLookup(table, name));
428 }
429
430 /**
431 * xmlGetParameterEntity:
432 * @doc: the document referencing the entity
433 * @name: the entity name
434 *
435 * Do an entity lookup in the internal and external subsets and
436 * returns the corresponding parameter entity, if found.
437 *
438 * Returns A pointer to the entity structure or NULL if not found.
439 */
440 xmlEntityPtr
xmlGetParameterEntity(xmlDocPtr doc,const xmlChar * name)441 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
442 xmlEntitiesTablePtr table;
443 xmlEntityPtr ret;
444
445 if (doc == NULL)
446 return(NULL);
447 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
448 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
449 ret = xmlGetEntityFromTable(table, name);
450 if (ret != NULL)
451 return(ret);
452 }
453 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
454 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
455 return(xmlGetEntityFromTable(table, name));
456 }
457 return(NULL);
458 }
459
460 /**
461 * xmlGetDtdEntity:
462 * @doc: the document referencing the entity
463 * @name: the entity name
464 *
465 * Do an entity lookup in the DTD entity hash table and
466 * returns the corresponding entity, if found.
467 * Note: the first argument is the document node, not the DTD node.
468 *
469 * Returns A pointer to the entity structure or NULL if not found.
470 */
471 xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc,const xmlChar * name)472 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
473 xmlEntitiesTablePtr table;
474
475 if (doc == NULL)
476 return(NULL);
477 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
478 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
479 return(xmlGetEntityFromTable(table, name));
480 }
481 return(NULL);
482 }
483
484 /**
485 * xmlGetDocEntity:
486 * @doc: the document referencing the entity
487 * @name: the entity name
488 *
489 * Do an entity lookup in the document entity hash table and
490 * returns the corresponding entity, otherwise a lookup is done
491 * in the predefined entities too.
492 *
493 * Returns A pointer to the entity structure or NULL if not found.
494 */
495 xmlEntityPtr
xmlGetDocEntity(const xmlDoc * doc,const xmlChar * name)496 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
497 xmlEntityPtr cur;
498 xmlEntitiesTablePtr table;
499
500 if (doc != NULL) {
501 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
502 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
503 cur = xmlGetEntityFromTable(table, name);
504 if (cur != NULL)
505 return(cur);
506 }
507 if (doc->standalone != 1) {
508 if ((doc->extSubset != NULL) &&
509 (doc->extSubset->entities != NULL)) {
510 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
511 cur = xmlGetEntityFromTable(table, name);
512 if (cur != NULL)
513 return(cur);
514 }
515 }
516 }
517 return(xmlGetPredefinedEntity(name));
518 }
519
520 /*
521 * xmlSerializeHexCharRef:
522 * @buf: a char buffer
523 * @val: a codepoint
524 *
525 * Serializes a hex char ref like  
526 *
527 * Writes at most 9 bytes. Does not include a terminating zero byte.
528 *
529 * Returns the number of bytes written.
530 */
531 int
xmlSerializeHexCharRef(char * buf,int val)532 xmlSerializeHexCharRef(char *buf, int val) {
533 char *out = buf;
534 int shift = 0, bits;
535
536 *out++ = '&';
537 *out++ = '#';
538 *out++ = 'x';
539
540 bits = val;
541 if (bits & 0xFF0000) {
542 shift = 16;
543 bits &= 0xFF0000;
544 } else if (bits & 0x00FF00) {
545 shift = 8;
546 bits &= 0x00FF00;
547 }
548 if (bits & 0xF0F0F0) {
549 shift += 4;
550 }
551
552 do {
553 int d = (val >> shift) & 0x0F;
554
555 if (d < 10)
556 *out++ = '0' + d;
557 else
558 *out++ = 'A' + (d - 10);
559
560 shift -= 4;
561 } while (shift >= 0);
562
563 *out++ = ';';
564
565 return(out - buf);
566 }
567
568 /*
569 * xmlSerializeDecCharRef:
570 * @buf: a char buffer
571 * @val: a codepoint
572 *
573 * Serializes a decimal char ref like &
574 *
575 * Writes at most 10 bytes. Does not include a terminating zero byte.
576 *
577 * Returns the number of bytes written.
578 */
579 int
xmlSerializeDecCharRef(char * buf,int val)580 xmlSerializeDecCharRef(char *buf, int val) {
581 char *out = buf;
582 int len, i;
583
584 *out++ = '&';
585 *out++ = '#';
586
587 if (val < 100) {
588 len = (val < 10) ? 1 : 2;
589 } else if (val < 10000) {
590 len = (val < 1000) ? 3 : 4;
591 } else if (val < 1000000) {
592 len = (val < 100000) ? 5 : 6;
593 } else {
594 len = 7;
595 }
596
597 for (i = len - 1; i >= 0; i--) {
598 out[i] = '0' + val % 10;
599 val /= 10;
600 }
601
602 out[len] = ';';
603
604 return(len + 3);
605 }
606
607 static const char xmlEscapeSafe[128] = {
608 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
611 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
612 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
613 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
614 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
615 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
616 };
617
618 /*
619 * xmlEscapeText:
620 * @text: input text
621 * @flags: XML_ESCAPE flags
622 *
623 * Escapes certain characters with char refs.
624 *
625 * XML_ESCAPE_ATTR: for attribute content.
626 * XML_ESCAPE_NON_ASCII: escape non-ASCII chars.
627 * XML_ESCAPE_HTML: for HTML content.
628 * XML_ESCAPE_QUOT: escape double quotes.
629 * XML_ESCAPE_ALLOW_INVALID: allow invalid characters.
630 *
631 * Returns an escaped string or NULL if a memory allocation failed.
632 */
633 xmlChar *
xmlEscapeText(const xmlChar * text,int flags)634 xmlEscapeText(const xmlChar *text, int flags) {
635 const xmlChar *cur;
636 xmlChar *buffer;
637 xmlChar *out;
638 const xmlChar *unescaped;
639 size_t size = 50;
640
641 buffer = xmlMalloc(size + 1);
642 if (buffer == NULL)
643 return(NULL);
644 out = buffer;
645
646 cur = text;
647 unescaped = cur;
648
649 while (*cur != '\0') {
650 char buf[12];
651 const xmlChar *end;
652 const xmlChar *repl;
653 size_t used;
654 size_t replSize;
655 size_t unescapedSize;
656 size_t totalSize;
657 int chunkSize = 1;
658 int c;
659
660 /* accelerator */
661 while (1) {
662 c = *cur;
663
664 if (c < 0x80) {
665 if (!xmlEscapeSafe[*cur])
666 break;
667 } else {
668 if (flags & XML_ESCAPE_NON_ASCII)
669 break;
670 }
671 cur += 1;
672 }
673
674 if (c == 0) {
675 chunkSize = 0;
676 repl = BAD_CAST "";
677 replSize = 0;
678 } else if (c == '<') {
679 /*
680 * Special handling of server side include in HTML attributes
681 */
682 if ((flags & XML_ESCAPE_HTML) && (flags & XML_ESCAPE_ATTR) &&
683 (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
684 ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
685 chunkSize = (end - cur) + 3;
686 repl = cur;
687 replSize = chunkSize;
688 } else {
689 repl = BAD_CAST "<";
690 replSize = 4;
691 }
692 } else if (c == '>') {
693 repl = BAD_CAST ">";
694 replSize = 4;
695 } else if (c == '&') {
696 /*
697 * Special handling of &{...} construct from HTML 4, see
698 * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
699 */
700 if ((flags & XML_ESCAPE_HTML) && (flags & XML_ESCAPE_ATTR) &&
701 (cur[1] == '{') && (end = xmlStrchr(cur, '}'))) {
702 chunkSize = (end - cur) + 1;
703 repl = cur;
704 replSize = chunkSize;
705 } else {
706 repl = BAD_CAST "&";
707 replSize = 5;
708 }
709 } else if ((flags & XML_ESCAPE_QUOT) && (c == '"')) {
710 repl = BAD_CAST """;
711 replSize = 6;
712 } else if (((flags & XML_ESCAPE_HTML) == 0) && (c == '\r')) {
713 repl = BAD_CAST " ";
714 replSize = 5;
715 } else if ((flags & XML_ESCAPE_NON_ASCII) && (c >= 0x80)) {
716 int val;
717
718 chunkSize = 4;
719 val = xmlGetUTF8Char(cur, &chunkSize);
720 if (val < 0) {
721 val = 0xFFFD;
722 chunkSize = 1;
723 } else if (((flags & XML_ESCAPE_ALLOW_INVALID) == 0) &&
724 (!IS_CHAR(val))) {
725 val = 0xFFFD;
726 }
727
728 replSize = xmlSerializeHexCharRef(buf, val);
729 repl = BAD_CAST buf;
730 } else if ((flags & (XML_ESCAPE_ALLOW_INVALID | XML_ESCAPE_HTML)) ||
731 (c >= 0x20) ||
732 (c == '\n') || (c == '\t') || (c == '\r')) {
733 /* default case, just copy */
734 cur += 1;
735 if (*cur != 0)
736 continue;
737
738 chunkSize = 0;
739 repl = BAD_CAST "";
740 replSize = 0;
741 } else {
742 /* ignore */
743 repl = BAD_CAST "";
744 replSize = 0;
745 }
746
747 used = out - buffer;
748 unescapedSize = cur - unescaped;
749 totalSize = unescapedSize + replSize;
750
751 cur += chunkSize;
752
753 if (totalSize > size - used) {
754 xmlChar *tmp;
755 int newSize;
756
757 if ((size > (SIZE_MAX - 1) / 2) ||
758 (totalSize > (SIZE_MAX - 1) / 2 - size)) {
759 xmlFree(buffer);
760 return(NULL);
761 }
762 newSize = size + totalSize;
763 if (*cur != 0)
764 newSize *= 2;
765 tmp = xmlRealloc(buffer, newSize + 1);
766 if (tmp == NULL) {
767 xmlFree(buffer);
768 return(NULL);
769 }
770 buffer = tmp;
771 size = newSize;
772 out = buffer + used;
773 }
774
775 memcpy(out, unescaped, unescapedSize);
776 out += unescapedSize;
777 memcpy(out, repl, replSize);
778 out += replSize;
779
780 unescaped = cur;
781 }
782
783 *out = 0;
784 return(buffer);
785 }
786
787 /**
788 * xmlEncodeEntitiesInternal:
789 * @doc: the document containing the string
790 * @input: A string to convert to XML.
791 * @flags: XML_ESCAPE flags
792 *
793 * Do a global encoding of a string, replacing the predefined entities
794 * and non ASCII values with their entities and CharRef counterparts.
795 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
796 * must be deallocated.
797 *
798 * Returns A newly allocated string with the substitution done.
799 */
800 xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc,const xmlChar * input,unsigned flags)801 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input,
802 unsigned flags) {
803 if (input == NULL)
804 return(NULL);
805
806 if ((doc != NULL) && (doc->type == XML_HTML_DOCUMENT_NODE))
807 flags |= XML_ESCAPE_HTML;
808 else if ((doc == NULL) || (doc->encoding == NULL))
809 flags |= XML_ESCAPE_NON_ASCII;
810
811 return(xmlEscapeText(input, flags));
812 }
813
814 /**
815 * xmlEncodeEntitiesReentrant:
816 * @doc: the document containing the string
817 * @input: A string to convert to XML.
818 *
819 * Do a global encoding of a string, replacing the predefined entities
820 * and non ASCII values with their entities and CharRef counterparts.
821 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
822 * must be deallocated.
823 *
824 * This escapes '<', '>', '&' and '\r'. If the document has no encoding,
825 * non-ASCII codepoints are escaped. There is some special handling for
826 * HTML documents.
827 *
828 * Returns A newly allocated string with the substitution done.
829 */
830 xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc,const xmlChar * input)831 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
832 return xmlEncodeEntitiesInternal(doc, input, 0);
833 }
834
835 /**
836 * xmlEncodeSpecialChars:
837 * @doc: unused
838 * @input: A string to convert to XML.
839 *
840 * Do a global encoding of a string, replacing the predefined entities
841 * this routine is reentrant, and result must be deallocated.
842 *
843 * This escapes '<', '>', '&', '"' and '\r' chars.
844 *
845 * Returns A newly allocated string with the substitution done.
846 */
847 xmlChar *
xmlEncodeSpecialChars(const xmlDoc * doc ATTRIBUTE_UNUSED,const xmlChar * input)848 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED,
849 const xmlChar *input) {
850 if (input == NULL)
851 return(NULL);
852
853 return(xmlEscapeText(input, XML_ESCAPE_QUOT | XML_ESCAPE_ALLOW_INVALID));
854 }
855
856 /**
857 * xmlCreateEntitiesTable:
858 *
859 * create and initialize an empty entities hash table.
860 * This really doesn't make sense and should be deprecated
861 *
862 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
863 */
864 xmlEntitiesTablePtr
xmlCreateEntitiesTable(void)865 xmlCreateEntitiesTable(void) {
866 return((xmlEntitiesTablePtr) xmlHashCreate(0));
867 }
868
869 /**
870 * xmlFreeEntityWrapper:
871 * @entity: An entity
872 * @name: its name
873 *
874 * Deallocate the memory used by an entities in the hash table.
875 */
876 static void
xmlFreeEntityWrapper(void * entity,const xmlChar * name ATTRIBUTE_UNUSED)877 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
878 if (entity != NULL)
879 xmlFreeEntity((xmlEntityPtr) entity);
880 }
881
882 /**
883 * xmlFreeEntitiesTable:
884 * @table: An entity table
885 *
886 * Deallocate the memory used by an entities hash table.
887 */
888 void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table)889 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
890 xmlHashFree(table, xmlFreeEntityWrapper);
891 }
892
893 /**
894 * xmlCopyEntity:
895 * @ent: An entity
896 *
897 * Build a copy of an entity
898 *
899 * Returns the new xmlEntitiesPtr or NULL in case of error.
900 */
901 static void *
xmlCopyEntity(void * payload,const xmlChar * name ATTRIBUTE_UNUSED)902 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
903 xmlEntityPtr ent = (xmlEntityPtr) payload;
904 xmlEntityPtr cur;
905
906 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
907 if (cur == NULL)
908 return(NULL);
909 memset(cur, 0, sizeof(xmlEntity));
910 cur->type = XML_ENTITY_DECL;
911
912 cur->etype = ent->etype;
913 if (ent->name != NULL) {
914 cur->name = xmlStrdup(ent->name);
915 if (cur->name == NULL)
916 goto error;
917 }
918 if (ent->ExternalID != NULL) {
919 cur->ExternalID = xmlStrdup(ent->ExternalID);
920 if (cur->ExternalID == NULL)
921 goto error;
922 }
923 if (ent->SystemID != NULL) {
924 cur->SystemID = xmlStrdup(ent->SystemID);
925 if (cur->SystemID == NULL)
926 goto error;
927 }
928 if (ent->content != NULL) {
929 cur->content = xmlStrdup(ent->content);
930 if (cur->content == NULL)
931 goto error;
932 }
933 if (ent->orig != NULL) {
934 cur->orig = xmlStrdup(ent->orig);
935 if (cur->orig == NULL)
936 goto error;
937 }
938 if (ent->URI != NULL) {
939 cur->URI = xmlStrdup(ent->URI);
940 if (cur->URI == NULL)
941 goto error;
942 }
943 return(cur);
944
945 error:
946 xmlFreeEntity(cur);
947 return(NULL);
948 }
949
950 /**
951 * xmlCopyEntitiesTable:
952 * @table: An entity table
953 *
954 * Build a copy of an entity table.
955 *
956 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
957 */
958 xmlEntitiesTablePtr
xmlCopyEntitiesTable(xmlEntitiesTablePtr table)959 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
960 return(xmlHashCopySafe(table, xmlCopyEntity, xmlFreeEntityWrapper));
961 }
962
963 #ifdef LIBXML_OUTPUT_ENABLED
964
965 /**
966 * xmlDumpEntityDecl:
967 * @buf: An XML buffer.
968 * @ent: An entity table
969 *
970 * This will dump the content of the entity table as an XML DTD definition
971 */
972 void
xmlDumpEntityDecl(xmlBufferPtr buf,xmlEntityPtr ent)973 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
974 xmlSaveCtxtPtr save;
975
976 if ((buf == NULL) || (ent == NULL))
977 return;
978
979 save = xmlSaveToBuffer(buf, NULL, 0);
980 xmlSaveTree(save, (xmlNodePtr) ent);
981 if (xmlSaveFinish(save) != XML_ERR_OK)
982 xmlFree(xmlBufferDetach(buf));
983 }
984
985 /**
986 * xmlDumpEntityDeclScan:
987 * @ent: An entity table
988 * @buf: An XML buffer.
989 *
990 * When using the hash table scan function, arguments need to be reversed
991 */
992 static void
xmlDumpEntityDeclScan(void * ent,void * save,const xmlChar * name ATTRIBUTE_UNUSED)993 xmlDumpEntityDeclScan(void *ent, void *save,
994 const xmlChar *name ATTRIBUTE_UNUSED) {
995 xmlSaveTree(save, ent);
996 }
997
998 /**
999 * xmlDumpEntitiesTable:
1000 * @buf: An XML buffer.
1001 * @table: An entity table
1002 *
1003 * This will dump the content of the entity table as an XML DTD definition
1004 */
1005 void
xmlDumpEntitiesTable(xmlBufferPtr buf,xmlEntitiesTablePtr table)1006 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1007 xmlSaveCtxtPtr save;
1008
1009 if ((buf == NULL) || (table == NULL))
1010 return;
1011
1012 save = xmlSaveToBuffer(buf, NULL, 0);
1013 xmlHashScan(table, xmlDumpEntityDeclScan, save);
1014 if (xmlSaveFinish(save) != XML_ERR_OK)
1015 xmlFree(xmlBufferDetach(buf));
1016 }
1017 #endif /* LIBXML_OUTPUT_ENABLED */
1018