1 /*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * daniel@veillard.com
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21 */
22
23 #define IN_LIBXML
24 #include "libxml.h"
25
26 #include <string.h>
27 #include <limits.h>
28
29 #ifdef HAVE_CTYPE_H
30 #include <ctype.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef LIBXML_ICONV_ENABLED
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #endif
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
44 #endif
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
47
48 #include "buf.h"
49 #include "enc.h"
50
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
57 const char *name;
58 const char *alias;
59 };
60
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
64
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66 #if 0
67 #define DEBUG_ENCODING /* Define this to get encoding traces */
68 #endif
69 #else
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
72 #endif
73 #endif
74
75 static int xmlLittleEndian = 1;
76
77 /**
78 * xmlEncodingErrMemory:
79 * @extra: extra information
80 *
81 * Handle an out of memory condition
82 */
83 static void
xmlEncodingErrMemory(const char * extra)84 xmlEncodingErrMemory(const char *extra)
85 {
86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87 }
88
89 /**
90 * xmlErrEncoding:
91 * @error: the error number
92 * @msg: the error message
93 *
94 * n encoding error
95 */
96 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98 {
99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100 XML_FROM_I18N, error, XML_ERR_FATAL,
101 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102 }
103
104 #ifdef LIBXML_ICU_ENABLED
105 static uconv_t*
openIcuConverter(const char * name,int toUnicode)106 openIcuConverter(const char* name, int toUnicode)
107 {
108 UErrorCode status = U_ZERO_ERROR;
109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110 if (conv == NULL)
111 return NULL;
112
113 conv->pivot_source = conv->pivot_buf;
114 conv->pivot_target = conv->pivot_buf;
115
116 conv->uconv = ucnv_open(name, &status);
117 if (U_FAILURE(status))
118 goto error;
119
120 status = U_ZERO_ERROR;
121 if (toUnicode) {
122 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123 NULL, NULL, NULL, &status);
124 }
125 else {
126 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127 NULL, NULL, NULL, &status);
128 }
129 if (U_FAILURE(status))
130 goto error;
131
132 status = U_ZERO_ERROR;
133 conv->utf8 = ucnv_open("UTF-8", &status);
134 if (U_SUCCESS(status))
135 return conv;
136
137 error:
138 if (conv->uconv)
139 ucnv_close(conv->uconv);
140 xmlFree(conv);
141 return NULL;
142 }
143
144 static void
closeIcuConverter(uconv_t * conv)145 closeIcuConverter(uconv_t *conv)
146 {
147 if (conv != NULL) {
148 ucnv_close(conv->uconv);
149 ucnv_close(conv->utf8);
150 xmlFree(conv);
151 }
152 }
153 #endif /* LIBXML_ICU_ENABLED */
154
155 /************************************************************************
156 * *
157 * Conversions To/From UTF8 encoding *
158 * *
159 ************************************************************************/
160
161 /**
162 * asciiToUTF8:
163 * @out: a pointer to an array of bytes to store the result
164 * @outlen: the length of @out
165 * @in: a pointer to an array of ASCII chars
166 * @inlen: the length of @in
167 *
168 * Take a block of ASCII chars in and try to convert it to an UTF-8
169 * block of chars out.
170 * Returns 0 if success, or -1 otherwise
171 * The value of @inlen after return is the number of octets consumed
172 * if the return value is positive, else unpredictable.
173 * The value of @outlen after return is the number of octets produced.
174 */
175 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)176 asciiToUTF8(unsigned char* out, int *outlen,
177 const unsigned char* in, int *inlen) {
178 unsigned char* outstart = out;
179 const unsigned char* base = in;
180 const unsigned char* processed = in;
181 unsigned char* outend = out + *outlen;
182 const unsigned char* inend;
183 unsigned int c;
184
185 inend = in + (*inlen);
186 while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 c= *in++;
188
189 if (out >= outend)
190 break;
191 if (c < 0x80) {
192 *out++ = c;
193 } else {
194 *outlen = out - outstart;
195 *inlen = processed - base;
196 return(-1);
197 }
198
199 processed = (const unsigned char*) in;
200 }
201 *outlen = out - outstart;
202 *inlen = processed - base;
203 return(*outlen);
204 }
205
206 #ifdef LIBXML_OUTPUT_ENABLED
207 /**
208 * UTF8Toascii:
209 * @out: a pointer to an array of bytes to store the result
210 * @outlen: the length of @out
211 * @in: a pointer to an array of UTF-8 chars
212 * @inlen: the length of @in
213 *
214 * Take a block of UTF-8 chars in and try to convert it to an ASCII
215 * block of chars out.
216 *
217 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218 * The value of @inlen after return is the number of octets consumed
219 * if the return value is positive, else unpredictable.
220 * The value of @outlen after return is the number of octets produced.
221 */
222 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)223 UTF8Toascii(unsigned char* out, int *outlen,
224 const unsigned char* in, int *inlen) {
225 const unsigned char* processed = in;
226 const unsigned char* outend;
227 const unsigned char* outstart = out;
228 const unsigned char* instart = in;
229 const unsigned char* inend;
230 unsigned int c, d;
231 int trailing;
232
233 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234 if (in == NULL) {
235 /*
236 * initialization nothing to do
237 */
238 *outlen = 0;
239 *inlen = 0;
240 return(0);
241 }
242 inend = in + (*inlen);
243 outend = out + (*outlen);
244 while (in < inend) {
245 d = *in++;
246 if (d < 0x80) { c= d; trailing= 0; }
247 else if (d < 0xC0) {
248 /* trailing byte in leading position */
249 *outlen = out - outstart;
250 *inlen = processed - instart;
251 return(-2);
252 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
253 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
254 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
255 else {
256 /* no chance for this in Ascii */
257 *outlen = out - outstart;
258 *inlen = processed - instart;
259 return(-2);
260 }
261
262 if (inend - in < trailing) {
263 break;
264 }
265
266 for ( ; trailing; trailing--) {
267 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268 break;
269 c <<= 6;
270 c |= d & 0x3F;
271 }
272
273 /* assertion: c is a single UTF-4 value */
274 if (c < 0x80) {
275 if (out >= outend)
276 break;
277 *out++ = c;
278 } else {
279 /* no chance for this in Ascii */
280 *outlen = out - outstart;
281 *inlen = processed - instart;
282 return(-2);
283 }
284 processed = in;
285 }
286 *outlen = out - outstart;
287 *inlen = processed - instart;
288 return(*outlen);
289 }
290 #endif /* LIBXML_OUTPUT_ENABLED */
291
292 /**
293 * isolat1ToUTF8:
294 * @out: a pointer to an array of bytes to store the result
295 * @outlen: the length of @out
296 * @in: a pointer to an array of ISO Latin 1 chars
297 * @inlen: the length of @in
298 *
299 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300 * block of chars out.
301 * Returns the number of bytes written if success, or -1 otherwise
302 * The value of @inlen after return is the number of octets consumed
303 * if the return value is positive, else unpredictable.
304 * The value of @outlen after return is the number of octets produced.
305 */
306 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)307 isolat1ToUTF8(unsigned char* out, int *outlen,
308 const unsigned char* in, int *inlen) {
309 unsigned char* outstart = out;
310 const unsigned char* base = in;
311 unsigned char* outend;
312 const unsigned char* inend;
313 const unsigned char* instop;
314
315 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316 return(-1);
317
318 outend = out + *outlen;
319 inend = in + (*inlen);
320 instop = inend;
321
322 while ((in < inend) && (out < outend - 1)) {
323 if (*in >= 0x80) {
324 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
325 *out++ = ((*in) & 0x3F) | 0x80;
326 ++in;
327 }
328 if ((instop - in) > (outend - out)) instop = in + (outend - out);
329 while ((in < instop) && (*in < 0x80)) {
330 *out++ = *in++;
331 }
332 }
333 if ((in < inend) && (out < outend) && (*in < 0x80)) {
334 *out++ = *in++;
335 }
336 *outlen = out - outstart;
337 *inlen = in - base;
338 return(*outlen);
339 }
340
341 /**
342 * UTF8ToUTF8:
343 * @out: a pointer to an array of bytes to store the result
344 * @outlen: the length of @out
345 * @inb: a pointer to an array of UTF-8 chars
346 * @inlenb: the length of @in in UTF-8 chars
347 *
348 * No op copy operation for UTF8 handling.
349 *
350 * Returns the number of bytes written, or -1 if lack of space.
351 * The value of *inlen after return is the number of octets consumed
352 * if the return value is positive, else unpredictable.
353 */
354 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)355 UTF8ToUTF8(unsigned char* out, int *outlen,
356 const unsigned char* inb, int *inlenb)
357 {
358 int len;
359
360 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361 return(-1);
362 if (inb == NULL) {
363 /* inb == NULL means output is initialized. */
364 *outlen = 0;
365 *inlenb = 0;
366 return(0);
367 }
368 if (*outlen > *inlenb) {
369 len = *inlenb;
370 } else {
371 len = *outlen;
372 }
373 if (len < 0)
374 return(-1);
375
376 /*
377 * FIXME: Conversion functions must assure valid UTF-8, so we have
378 * to check for UTF-8 validity. Preferably, this converter shouldn't
379 * be used at all.
380 */
381 memcpy(out, inb, len);
382
383 *outlen = len;
384 *inlenb = len;
385 return(*outlen);
386 }
387
388
389 #ifdef LIBXML_OUTPUT_ENABLED
390 /**
391 * UTF8Toisolat1:
392 * @out: a pointer to an array of bytes to store the result
393 * @outlen: the length of @out
394 * @in: a pointer to an array of UTF-8 chars
395 * @inlen: the length of @in
396 *
397 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
398 * block of chars out.
399 *
400 * Returns the number of bytes written if success, -2 if the transcoding fails,
401 or -1 otherwise
402 * The value of @inlen after return is the number of octets consumed
403 * if the return value is positive, else unpredictable.
404 * The value of @outlen after return is the number of octets produced.
405 */
406 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)407 UTF8Toisolat1(unsigned char* out, int *outlen,
408 const unsigned char* in, int *inlen) {
409 const unsigned char* processed = in;
410 const unsigned char* outend;
411 const unsigned char* outstart = out;
412 const unsigned char* instart = in;
413 const unsigned char* inend;
414 unsigned int c, d;
415 int trailing;
416
417 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
418 if (in == NULL) {
419 /*
420 * initialization nothing to do
421 */
422 *outlen = 0;
423 *inlen = 0;
424 return(0);
425 }
426 inend = in + (*inlen);
427 outend = out + (*outlen);
428 while (in < inend) {
429 d = *in++;
430 if (d < 0x80) { c= d; trailing= 0; }
431 else if (d < 0xC0) {
432 /* trailing byte in leading position */
433 *outlen = out - outstart;
434 *inlen = processed - instart;
435 return(-2);
436 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
437 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
438 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
439 else {
440 /* no chance for this in IsoLat1 */
441 *outlen = out - outstart;
442 *inlen = processed - instart;
443 return(-2);
444 }
445
446 if (inend - in < trailing) {
447 break;
448 }
449
450 for ( ; trailing; trailing--) {
451 if (in >= inend)
452 break;
453 if (((d= *in++) & 0xC0) != 0x80) {
454 *outlen = out - outstart;
455 *inlen = processed - instart;
456 return(-2);
457 }
458 c <<= 6;
459 c |= d & 0x3F;
460 }
461
462 /* assertion: c is a single UTF-4 value */
463 if (c <= 0xFF) {
464 if (out >= outend)
465 break;
466 *out++ = c;
467 } else {
468 /* no chance for this in IsoLat1 */
469 *outlen = out - outstart;
470 *inlen = processed - instart;
471 return(-2);
472 }
473 processed = in;
474 }
475 *outlen = out - outstart;
476 *inlen = processed - instart;
477 return(*outlen);
478 }
479 #endif /* LIBXML_OUTPUT_ENABLED */
480
481 /**
482 * UTF16LEToUTF8:
483 * @out: a pointer to an array of bytes to store the result
484 * @outlen: the length of @out
485 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
486 * @inlenb: the length of @in in UTF-16LE chars
487 *
488 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
489 * block of chars out. This function assumes the endian property
490 * is the same between the native type of this machine and the
491 * inputed one.
492 *
493 * Returns the number of bytes written, or -1 if lack of space, or -2
494 * if the transcoding fails (if *in is not a valid utf16 string)
495 * The value of *inlen after return is the number of octets consumed
496 * if the return value is positive, else unpredictable.
497 */
498 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)499 UTF16LEToUTF8(unsigned char* out, int *outlen,
500 const unsigned char* inb, int *inlenb)
501 {
502 unsigned char* outstart = out;
503 const unsigned char* processed = inb;
504 unsigned char* outend;
505 unsigned short* in = (unsigned short*) inb;
506 unsigned short* inend;
507 unsigned int c, d, inlen;
508 unsigned char *tmp;
509 int bits;
510
511 if (*outlen == 0) {
512 *inlenb = 0;
513 return(0);
514 }
515 outend = out + *outlen;
516 if ((*inlenb % 2) == 1)
517 (*inlenb)--;
518 inlen = *inlenb / 2;
519 inend = in + inlen;
520 while ((in < inend) && (out - outstart + 5 < *outlen)) {
521 if (xmlLittleEndian) {
522 c= *in++;
523 } else {
524 tmp = (unsigned char *) in;
525 c = *tmp++;
526 c = c | (((unsigned int)*tmp) << 8);
527 in++;
528 }
529 if ((c & 0xFC00) == 0xD800) { /* surrogates */
530 if (in >= inend) { /* handle split mutli-byte characters */
531 break;
532 }
533 if (xmlLittleEndian) {
534 d = *in++;
535 } else {
536 tmp = (unsigned char *) in;
537 d = *tmp++;
538 d = d | (((unsigned int)*tmp) << 8);
539 in++;
540 }
541 if ((d & 0xFC00) == 0xDC00) {
542 c &= 0x03FF;
543 c <<= 10;
544 c |= d & 0x03FF;
545 c += 0x10000;
546 }
547 else {
548 *outlen = out - outstart;
549 *inlenb = processed - inb;
550 return(-2);
551 }
552 }
553
554 /* assertion: c is a single UTF-4 value */
555 if (out >= outend)
556 break;
557 if (c < 0x80) { *out++= c; bits= -6; }
558 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
559 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
560 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
561
562 for ( ; bits >= 0; bits-= 6) {
563 if (out >= outend)
564 break;
565 *out++= ((c >> bits) & 0x3F) | 0x80;
566 }
567 processed = (const unsigned char*) in;
568 }
569 *outlen = out - outstart;
570 *inlenb = processed - inb;
571 return(*outlen);
572 }
573
574 #ifdef LIBXML_OUTPUT_ENABLED
575 /**
576 * UTF8ToUTF16LE:
577 * @outb: a pointer to an array of bytes to store the result
578 * @outlen: the length of @outb
579 * @in: a pointer to an array of UTF-8 chars
580 * @inlen: the length of @in
581 *
582 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
583 * block of chars out.
584 *
585 * Returns the number of bytes written, or -1 if lack of space, or -2
586 * if the transcoding failed.
587 */
588 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)589 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
590 const unsigned char* in, int *inlen)
591 {
592 unsigned short* out = (unsigned short*) outb;
593 const unsigned char* processed = in;
594 const unsigned char *const instart = in;
595 unsigned short* outstart= out;
596 unsigned short* outend;
597 const unsigned char* inend;
598 unsigned int c, d;
599 int trailing;
600 unsigned char *tmp;
601 unsigned short tmp1, tmp2;
602
603 /* UTF16LE encoding has no BOM */
604 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
605 if (in == NULL) {
606 *outlen = 0;
607 *inlen = 0;
608 return(0);
609 }
610 inend= in + *inlen;
611 outend = out + (*outlen / 2);
612 while (in < inend) {
613 d= *in++;
614 if (d < 0x80) { c= d; trailing= 0; }
615 else if (d < 0xC0) {
616 /* trailing byte in leading position */
617 *outlen = (out - outstart) * 2;
618 *inlen = processed - instart;
619 return(-2);
620 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
621 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
622 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
623 else {
624 /* no chance for this in UTF-16 */
625 *outlen = (out - outstart) * 2;
626 *inlen = processed - instart;
627 return(-2);
628 }
629
630 if (inend - in < trailing) {
631 break;
632 }
633
634 for ( ; trailing; trailing--) {
635 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
636 break;
637 c <<= 6;
638 c |= d & 0x3F;
639 }
640
641 /* assertion: c is a single UTF-4 value */
642 if (c < 0x10000) {
643 if (out >= outend)
644 break;
645 if (xmlLittleEndian) {
646 *out++ = c;
647 } else {
648 tmp = (unsigned char *) out;
649 *tmp = c ;
650 *(tmp + 1) = c >> 8 ;
651 out++;
652 }
653 }
654 else if (c < 0x110000) {
655 if (out+1 >= outend)
656 break;
657 c -= 0x10000;
658 if (xmlLittleEndian) {
659 *out++ = 0xD800 | (c >> 10);
660 *out++ = 0xDC00 | (c & 0x03FF);
661 } else {
662 tmp1 = 0xD800 | (c >> 10);
663 tmp = (unsigned char *) out;
664 *tmp = (unsigned char) tmp1;
665 *(tmp + 1) = tmp1 >> 8;
666 out++;
667
668 tmp2 = 0xDC00 | (c & 0x03FF);
669 tmp = (unsigned char *) out;
670 *tmp = (unsigned char) tmp2;
671 *(tmp + 1) = tmp2 >> 8;
672 out++;
673 }
674 }
675 else
676 break;
677 processed = in;
678 }
679 *outlen = (out - outstart) * 2;
680 *inlen = processed - instart;
681 return(*outlen);
682 }
683
684 /**
685 * UTF8ToUTF16:
686 * @outb: a pointer to an array of bytes to store the result
687 * @outlen: the length of @outb
688 * @in: a pointer to an array of UTF-8 chars
689 * @inlen: the length of @in
690 *
691 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
692 * block of chars out.
693 *
694 * Returns the number of bytes written, or -1 if lack of space, or -2
695 * if the transcoding failed.
696 */
697 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)698 UTF8ToUTF16(unsigned char* outb, int *outlen,
699 const unsigned char* in, int *inlen)
700 {
701 if (in == NULL) {
702 /*
703 * initialization, add the Byte Order Mark for UTF-16LE
704 */
705 if (*outlen >= 2) {
706 outb[0] = 0xFF;
707 outb[1] = 0xFE;
708 *outlen = 2;
709 *inlen = 0;
710 #ifdef DEBUG_ENCODING
711 xmlGenericError(xmlGenericErrorContext,
712 "Added FFFE Byte Order Mark\n");
713 #endif
714 return(2);
715 }
716 *outlen = 0;
717 *inlen = 0;
718 return(0);
719 }
720 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
721 }
722 #endif /* LIBXML_OUTPUT_ENABLED */
723
724 /**
725 * UTF16BEToUTF8:
726 * @out: a pointer to an array of bytes to store the result
727 * @outlen: the length of @out
728 * @inb: a pointer to an array of UTF-16 passed as a byte array
729 * @inlenb: the length of @in in UTF-16 chars
730 *
731 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
732 * block of chars out. This function assumes the endian property
733 * is the same between the native type of this machine and the
734 * inputed one.
735 *
736 * Returns the number of bytes written, or -1 if lack of space, or -2
737 * if the transcoding fails (if *in is not a valid utf16 string)
738 * The value of *inlen after return is the number of octets consumed
739 * if the return value is positive, else unpredictable.
740 */
741 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)742 UTF16BEToUTF8(unsigned char* out, int *outlen,
743 const unsigned char* inb, int *inlenb)
744 {
745 unsigned char* outstart = out;
746 const unsigned char* processed = inb;
747 unsigned char* outend;
748 unsigned short* in = (unsigned short*) inb;
749 unsigned short* inend;
750 unsigned int c, d, inlen;
751 unsigned char *tmp;
752 int bits;
753
754 if (*outlen == 0) {
755 *inlenb = 0;
756 return(0);
757 }
758 outend = out + *outlen;
759 if ((*inlenb % 2) == 1)
760 (*inlenb)--;
761 inlen = *inlenb / 2;
762 inend= in + inlen;
763 while ((in < inend) && (out - outstart + 5 < *outlen)) {
764 if (xmlLittleEndian) {
765 tmp = (unsigned char *) in;
766 c = *tmp++;
767 c = (c << 8) | (unsigned int) *tmp;
768 in++;
769 } else {
770 c= *in++;
771 }
772 if ((c & 0xFC00) == 0xD800) { /* surrogates */
773 if (in >= inend) { /* handle split mutli-byte characters */
774 break;
775 }
776 if (xmlLittleEndian) {
777 tmp = (unsigned char *) in;
778 d = *tmp++;
779 d = (d << 8) | (unsigned int) *tmp;
780 in++;
781 } else {
782 d= *in++;
783 }
784 if ((d & 0xFC00) == 0xDC00) {
785 c &= 0x03FF;
786 c <<= 10;
787 c |= d & 0x03FF;
788 c += 0x10000;
789 }
790 else {
791 *outlen = out - outstart;
792 *inlenb = processed - inb;
793 return(-2);
794 }
795 }
796
797 /* assertion: c is a single UTF-4 value */
798 if (out >= outend)
799 break;
800 if (c < 0x80) { *out++= c; bits= -6; }
801 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
802 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
803 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
804
805 for ( ; bits >= 0; bits-= 6) {
806 if (out >= outend)
807 break;
808 *out++= ((c >> bits) & 0x3F) | 0x80;
809 }
810 processed = (const unsigned char*) in;
811 }
812 *outlen = out - outstart;
813 *inlenb = processed - inb;
814 return(*outlen);
815 }
816
817 #ifdef LIBXML_OUTPUT_ENABLED
818 /**
819 * UTF8ToUTF16BE:
820 * @outb: a pointer to an array of bytes to store the result
821 * @outlen: the length of @outb
822 * @in: a pointer to an array of UTF-8 chars
823 * @inlen: the length of @in
824 *
825 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
826 * block of chars out.
827 *
828 * Returns the number of byte written, or -1 by lack of space, or -2
829 * if the transcoding failed.
830 */
831 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)832 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
833 const unsigned char* in, int *inlen)
834 {
835 unsigned short* out = (unsigned short*) outb;
836 const unsigned char* processed = in;
837 const unsigned char *const instart = in;
838 unsigned short* outstart= out;
839 unsigned short* outend;
840 const unsigned char* inend;
841 unsigned int c, d;
842 int trailing;
843 unsigned char *tmp;
844 unsigned short tmp1, tmp2;
845
846 /* UTF-16BE has no BOM */
847 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
848 if (in == NULL) {
849 *outlen = 0;
850 *inlen = 0;
851 return(0);
852 }
853 inend= in + *inlen;
854 outend = out + (*outlen / 2);
855 while (in < inend) {
856 d= *in++;
857 if (d < 0x80) { c= d; trailing= 0; }
858 else if (d < 0xC0) {
859 /* trailing byte in leading position */
860 *outlen = out - outstart;
861 *inlen = processed - instart;
862 return(-2);
863 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
864 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
865 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
866 else {
867 /* no chance for this in UTF-16 */
868 *outlen = out - outstart;
869 *inlen = processed - instart;
870 return(-2);
871 }
872
873 if (inend - in < trailing) {
874 break;
875 }
876
877 for ( ; trailing; trailing--) {
878 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
879 c <<= 6;
880 c |= d & 0x3F;
881 }
882
883 /* assertion: c is a single UTF-4 value */
884 if (c < 0x10000) {
885 if (out >= outend) break;
886 if (xmlLittleEndian) {
887 tmp = (unsigned char *) out;
888 *tmp = c >> 8;
889 *(tmp + 1) = c;
890 out++;
891 } else {
892 *out++ = c;
893 }
894 }
895 else if (c < 0x110000) {
896 if (out+1 >= outend) break;
897 c -= 0x10000;
898 if (xmlLittleEndian) {
899 tmp1 = 0xD800 | (c >> 10);
900 tmp = (unsigned char *) out;
901 *tmp = tmp1 >> 8;
902 *(tmp + 1) = (unsigned char) tmp1;
903 out++;
904
905 tmp2 = 0xDC00 | (c & 0x03FF);
906 tmp = (unsigned char *) out;
907 *tmp = tmp2 >> 8;
908 *(tmp + 1) = (unsigned char) tmp2;
909 out++;
910 } else {
911 *out++ = 0xD800 | (c >> 10);
912 *out++ = 0xDC00 | (c & 0x03FF);
913 }
914 }
915 else
916 break;
917 processed = in;
918 }
919 *outlen = (out - outstart) * 2;
920 *inlen = processed - instart;
921 return(*outlen);
922 }
923 #endif /* LIBXML_OUTPUT_ENABLED */
924
925 /************************************************************************
926 * *
927 * Generic encoding handling routines *
928 * *
929 ************************************************************************/
930
931 /**
932 * xmlDetectCharEncoding:
933 * @in: a pointer to the first bytes of the XML entity, must be at least
934 * 2 bytes long (at least 4 if encoding is UTF4 variant).
935 * @len: pointer to the length of the buffer
936 *
937 * Guess the encoding of the entity using the first bytes of the entity content
938 * according to the non-normative appendix F of the XML-1.0 recommendation.
939 *
940 * Returns one of the XML_CHAR_ENCODING_... values.
941 */
942 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)943 xmlDetectCharEncoding(const unsigned char* in, int len)
944 {
945 if (in == NULL)
946 return(XML_CHAR_ENCODING_NONE);
947 if (len >= 4) {
948 if ((in[0] == 0x00) && (in[1] == 0x00) &&
949 (in[2] == 0x00) && (in[3] == 0x3C))
950 return(XML_CHAR_ENCODING_UCS4BE);
951 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952 (in[2] == 0x00) && (in[3] == 0x00))
953 return(XML_CHAR_ENCODING_UCS4LE);
954 if ((in[0] == 0x00) && (in[1] == 0x00) &&
955 (in[2] == 0x3C) && (in[3] == 0x00))
956 return(XML_CHAR_ENCODING_UCS4_2143);
957 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
958 (in[2] == 0x00) && (in[3] == 0x00))
959 return(XML_CHAR_ENCODING_UCS4_3412);
960 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
961 (in[2] == 0xA7) && (in[3] == 0x94))
962 return(XML_CHAR_ENCODING_EBCDIC);
963 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
964 (in[2] == 0x78) && (in[3] == 0x6D))
965 return(XML_CHAR_ENCODING_UTF8);
966 /*
967 * Although not part of the recommendation, we also
968 * attempt an "auto-recognition" of UTF-16LE and
969 * UTF-16BE encodings.
970 */
971 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
972 (in[2] == 0x3F) && (in[3] == 0x00))
973 return(XML_CHAR_ENCODING_UTF16LE);
974 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
975 (in[2] == 0x00) && (in[3] == 0x3F))
976 return(XML_CHAR_ENCODING_UTF16BE);
977 }
978 if (len >= 3) {
979 /*
980 * Errata on XML-1.0 June 20 2001
981 * We now allow an UTF8 encoded BOM
982 */
983 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
984 (in[2] == 0xBF))
985 return(XML_CHAR_ENCODING_UTF8);
986 }
987 /* For UTF-16 we can recognize by the BOM */
988 if (len >= 2) {
989 if ((in[0] == 0xFE) && (in[1] == 0xFF))
990 return(XML_CHAR_ENCODING_UTF16BE);
991 if ((in[0] == 0xFF) && (in[1] == 0xFE))
992 return(XML_CHAR_ENCODING_UTF16LE);
993 }
994 return(XML_CHAR_ENCODING_NONE);
995 }
996
997 /**
998 * xmlCleanupEncodingAliases:
999 *
1000 * Unregisters all aliases
1001 */
1002 void
xmlCleanupEncodingAliases(void)1003 xmlCleanupEncodingAliases(void) {
1004 int i;
1005
1006 if (xmlCharEncodingAliases == NULL)
1007 return;
1008
1009 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1010 if (xmlCharEncodingAliases[i].name != NULL)
1011 xmlFree((char *) xmlCharEncodingAliases[i].name);
1012 if (xmlCharEncodingAliases[i].alias != NULL)
1013 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1014 }
1015 xmlCharEncodingAliasesNb = 0;
1016 xmlCharEncodingAliasesMax = 0;
1017 xmlFree(xmlCharEncodingAliases);
1018 xmlCharEncodingAliases = NULL;
1019 }
1020
1021 /**
1022 * xmlGetEncodingAlias:
1023 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1024 *
1025 * Lookup an encoding name for the given alias.
1026 *
1027 * Returns NULL if not found, otherwise the original name
1028 */
1029 const char *
xmlGetEncodingAlias(const char * alias)1030 xmlGetEncodingAlias(const char *alias) {
1031 int i;
1032 char upper[100];
1033
1034 if (alias == NULL)
1035 return(NULL);
1036
1037 if (xmlCharEncodingAliases == NULL)
1038 return(NULL);
1039
1040 for (i = 0;i < 99;i++) {
1041 upper[i] = toupper(alias[i]);
1042 if (upper[i] == 0) break;
1043 }
1044 upper[i] = 0;
1045
1046 /*
1047 * Walk down the list looking for a definition of the alias
1048 */
1049 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1050 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1051 return(xmlCharEncodingAliases[i].name);
1052 }
1053 }
1054 return(NULL);
1055 }
1056
1057 /**
1058 * xmlAddEncodingAlias:
1059 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1060 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1061 *
1062 * Registers an alias @alias for an encoding named @name. Existing alias
1063 * will be overwritten.
1064 *
1065 * Returns 0 in case of success, -1 in case of error
1066 */
1067 int
xmlAddEncodingAlias(const char * name,const char * alias)1068 xmlAddEncodingAlias(const char *name, const char *alias) {
1069 int i;
1070 char upper[100];
1071
1072 if ((name == NULL) || (alias == NULL))
1073 return(-1);
1074
1075 for (i = 0;i < 99;i++) {
1076 upper[i] = toupper(alias[i]);
1077 if (upper[i] == 0) break;
1078 }
1079 upper[i] = 0;
1080
1081 if (xmlCharEncodingAliases == NULL) {
1082 xmlCharEncodingAliasesNb = 0;
1083 xmlCharEncodingAliasesMax = 20;
1084 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1085 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1086 if (xmlCharEncodingAliases == NULL)
1087 return(-1);
1088 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1089 xmlCharEncodingAliasesMax *= 2;
1090 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1091 xmlRealloc(xmlCharEncodingAliases,
1092 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1093 }
1094 /*
1095 * Walk down the list looking for a definition of the alias
1096 */
1097 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1098 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1099 /*
1100 * Replace the definition.
1101 */
1102 xmlFree((char *) xmlCharEncodingAliases[i].name);
1103 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1104 return(0);
1105 }
1106 }
1107 /*
1108 * Add the definition
1109 */
1110 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1111 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1112 xmlCharEncodingAliasesNb++;
1113 return(0);
1114 }
1115
1116 /**
1117 * xmlDelEncodingAlias:
1118 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1119 *
1120 * Unregisters an encoding alias @alias
1121 *
1122 * Returns 0 in case of success, -1 in case of error
1123 */
1124 int
xmlDelEncodingAlias(const char * alias)1125 xmlDelEncodingAlias(const char *alias) {
1126 int i;
1127
1128 if (alias == NULL)
1129 return(-1);
1130
1131 if (xmlCharEncodingAliases == NULL)
1132 return(-1);
1133 /*
1134 * Walk down the list looking for a definition of the alias
1135 */
1136 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1137 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1138 xmlFree((char *) xmlCharEncodingAliases[i].name);
1139 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1140 xmlCharEncodingAliasesNb--;
1141 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1142 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1143 return(0);
1144 }
1145 }
1146 return(-1);
1147 }
1148
1149 /**
1150 * xmlParseCharEncoding:
1151 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1152 *
1153 * Compare the string to the encoding schemes already known. Note
1154 * that the comparison is case insensitive accordingly to the section
1155 * [XML] 4.3.3 Character Encoding in Entities.
1156 *
1157 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1158 * if not recognized.
1159 */
1160 xmlCharEncoding
xmlParseCharEncoding(const char * name)1161 xmlParseCharEncoding(const char* name)
1162 {
1163 const char *alias;
1164 char upper[500];
1165 int i;
1166
1167 if (name == NULL)
1168 return(XML_CHAR_ENCODING_NONE);
1169
1170 /*
1171 * Do the alias resolution
1172 */
1173 alias = xmlGetEncodingAlias(name);
1174 if (alias != NULL)
1175 name = alias;
1176
1177 for (i = 0;i < 499;i++) {
1178 upper[i] = toupper(name[i]);
1179 if (upper[i] == 0) break;
1180 }
1181 upper[i] = 0;
1182
1183 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1184 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1185 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1186
1187 /*
1188 * NOTE: if we were able to parse this, the endianness of UTF16 is
1189 * already found and in use
1190 */
1191 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1192 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1193
1194 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1196 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1197
1198 /*
1199 * NOTE: if we were able to parse this, the endianness of UCS4 is
1200 * already found and in use
1201 */
1202 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1204 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1205
1206
1207 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1208 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1209 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1210
1211 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1212 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1213 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1214
1215 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1216 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1217 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1218 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1219 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1220 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1221 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1222
1223 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1224 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1225 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1226
1227 #ifdef DEBUG_ENCODING
1228 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1229 #endif
1230 return(XML_CHAR_ENCODING_ERROR);
1231 }
1232
1233 /**
1234 * xmlGetCharEncodingName:
1235 * @enc: the encoding
1236 *
1237 * The "canonical" name for XML encoding.
1238 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1239 * Section 4.3.3 Character Encoding in Entities
1240 *
1241 * Returns the canonical name for the given encoding
1242 */
1243
1244 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1245 xmlGetCharEncodingName(xmlCharEncoding enc) {
1246 switch (enc) {
1247 case XML_CHAR_ENCODING_ERROR:
1248 return(NULL);
1249 case XML_CHAR_ENCODING_NONE:
1250 return(NULL);
1251 case XML_CHAR_ENCODING_UTF8:
1252 return("UTF-8");
1253 case XML_CHAR_ENCODING_UTF16LE:
1254 return("UTF-16");
1255 case XML_CHAR_ENCODING_UTF16BE:
1256 return("UTF-16");
1257 case XML_CHAR_ENCODING_EBCDIC:
1258 return("EBCDIC");
1259 case XML_CHAR_ENCODING_UCS4LE:
1260 return("ISO-10646-UCS-4");
1261 case XML_CHAR_ENCODING_UCS4BE:
1262 return("ISO-10646-UCS-4");
1263 case XML_CHAR_ENCODING_UCS4_2143:
1264 return("ISO-10646-UCS-4");
1265 case XML_CHAR_ENCODING_UCS4_3412:
1266 return("ISO-10646-UCS-4");
1267 case XML_CHAR_ENCODING_UCS2:
1268 return("ISO-10646-UCS-2");
1269 case XML_CHAR_ENCODING_8859_1:
1270 return("ISO-8859-1");
1271 case XML_CHAR_ENCODING_8859_2:
1272 return("ISO-8859-2");
1273 case XML_CHAR_ENCODING_8859_3:
1274 return("ISO-8859-3");
1275 case XML_CHAR_ENCODING_8859_4:
1276 return("ISO-8859-4");
1277 case XML_CHAR_ENCODING_8859_5:
1278 return("ISO-8859-5");
1279 case XML_CHAR_ENCODING_8859_6:
1280 return("ISO-8859-6");
1281 case XML_CHAR_ENCODING_8859_7:
1282 return("ISO-8859-7");
1283 case XML_CHAR_ENCODING_8859_8:
1284 return("ISO-8859-8");
1285 case XML_CHAR_ENCODING_8859_9:
1286 return("ISO-8859-9");
1287 case XML_CHAR_ENCODING_2022_JP:
1288 return("ISO-2022-JP");
1289 case XML_CHAR_ENCODING_SHIFT_JIS:
1290 return("Shift-JIS");
1291 case XML_CHAR_ENCODING_EUC_JP:
1292 return("EUC-JP");
1293 case XML_CHAR_ENCODING_ASCII:
1294 return(NULL);
1295 }
1296 return(NULL);
1297 }
1298
1299 /************************************************************************
1300 * *
1301 * Char encoding handlers *
1302 * *
1303 ************************************************************************/
1304
1305
1306 /* the size should be growable, but it's not a big deal ... */
1307 #define MAX_ENCODING_HANDLERS 50
1308 static xmlCharEncodingHandlerPtr *handlers = NULL;
1309 static int nbCharEncodingHandler = 0;
1310
1311 /*
1312 * The default is UTF-8 for XML, that's also the default used for the
1313 * parser internals, so the default encoding handler is NULL
1314 */
1315
1316 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1317
1318 /**
1319 * xmlNewCharEncodingHandler:
1320 * @name: the encoding name, in UTF-8 format (ASCII actually)
1321 * @input: the xmlCharEncodingInputFunc to read that encoding
1322 * @output: the xmlCharEncodingOutputFunc to write that encoding
1323 *
1324 * Create and registers an xmlCharEncodingHandler.
1325 *
1326 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1327 */
1328 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1329 xmlNewCharEncodingHandler(const char *name,
1330 xmlCharEncodingInputFunc input,
1331 xmlCharEncodingOutputFunc output) {
1332 xmlCharEncodingHandlerPtr handler;
1333 const char *alias;
1334 char upper[500];
1335 int i;
1336 char *up = NULL;
1337
1338 /*
1339 * Do the alias resolution
1340 */
1341 alias = xmlGetEncodingAlias(name);
1342 if (alias != NULL)
1343 name = alias;
1344
1345 /*
1346 * Keep only the uppercase version of the encoding.
1347 */
1348 if (name == NULL) {
1349 xmlEncodingErr(XML_I18N_NO_NAME,
1350 "xmlNewCharEncodingHandler : no name !\n", NULL);
1351 return(NULL);
1352 }
1353 for (i = 0;i < 499;i++) {
1354 upper[i] = toupper(name[i]);
1355 if (upper[i] == 0) break;
1356 }
1357 upper[i] = 0;
1358 up = xmlMemStrdup(upper);
1359 if (up == NULL) {
1360 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361 return(NULL);
1362 }
1363
1364 /*
1365 * allocate and fill-up an handler block.
1366 */
1367 handler = (xmlCharEncodingHandlerPtr)
1368 xmlMalloc(sizeof(xmlCharEncodingHandler));
1369 if (handler == NULL) {
1370 xmlFree(up);
1371 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1372 return(NULL);
1373 }
1374 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1375 handler->input = input;
1376 handler->output = output;
1377 handler->name = up;
1378
1379 #ifdef LIBXML_ICONV_ENABLED
1380 handler->iconv_in = NULL;
1381 handler->iconv_out = NULL;
1382 #endif
1383 #ifdef LIBXML_ICU_ENABLED
1384 handler->uconv_in = NULL;
1385 handler->uconv_out = NULL;
1386 #endif
1387
1388 /*
1389 * registers and returns the handler.
1390 */
1391 xmlRegisterCharEncodingHandler(handler);
1392 #ifdef DEBUG_ENCODING
1393 xmlGenericError(xmlGenericErrorContext,
1394 "Registered encoding handler for %s\n", name);
1395 #endif
1396 return(handler);
1397 }
1398
1399 /**
1400 * xmlInitCharEncodingHandlers:
1401 *
1402 * Initialize the char encoding support, it registers the default
1403 * encoding supported.
1404 * NOTE: while public, this function usually doesn't need to be called
1405 * in normal processing.
1406 */
1407 void
xmlInitCharEncodingHandlers(void)1408 xmlInitCharEncodingHandlers(void) {
1409 unsigned short int tst = 0x1234;
1410 unsigned char *ptr = (unsigned char *) &tst;
1411
1412 if (handlers != NULL) return;
1413
1414 handlers = (xmlCharEncodingHandlerPtr *)
1415 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1416
1417 if (*ptr == 0x12) xmlLittleEndian = 0;
1418 else if (*ptr == 0x34) xmlLittleEndian = 1;
1419 else {
1420 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1421 "Odd problem at endianness detection\n", NULL);
1422 }
1423
1424 if (handlers == NULL) {
1425 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1426 return;
1427 }
1428 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1429 #ifdef LIBXML_OUTPUT_ENABLED
1430 xmlUTF16LEHandler =
1431 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1432 xmlUTF16BEHandler =
1433 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1434 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1435 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1436 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1437 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1438 #ifdef LIBXML_HTML_ENABLED
1439 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1440 #endif
1441 #else
1442 xmlUTF16LEHandler =
1443 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1444 xmlUTF16BEHandler =
1445 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1446 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1447 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1448 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1449 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1450 #endif /* LIBXML_OUTPUT_ENABLED */
1451 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1452 #ifdef LIBXML_ISO8859X_ENABLED
1453 xmlRegisterCharEncodingHandlersISO8859x ();
1454 #endif
1455 #endif
1456
1457 }
1458
1459 /**
1460 * xmlCleanupCharEncodingHandlers:
1461 *
1462 * Cleanup the memory allocated for the char encoding support, it
1463 * unregisters all the encoding handlers and the aliases.
1464 */
1465 void
xmlCleanupCharEncodingHandlers(void)1466 xmlCleanupCharEncodingHandlers(void) {
1467 xmlCleanupEncodingAliases();
1468
1469 if (handlers == NULL) return;
1470
1471 for (;nbCharEncodingHandler > 0;) {
1472 nbCharEncodingHandler--;
1473 if (handlers[nbCharEncodingHandler] != NULL) {
1474 if (handlers[nbCharEncodingHandler]->name != NULL)
1475 xmlFree(handlers[nbCharEncodingHandler]->name);
1476 xmlFree(handlers[nbCharEncodingHandler]);
1477 }
1478 }
1479 xmlFree(handlers);
1480 handlers = NULL;
1481 nbCharEncodingHandler = 0;
1482 xmlDefaultCharEncodingHandler = NULL;
1483 }
1484
1485 /**
1486 * xmlRegisterCharEncodingHandler:
1487 * @handler: the xmlCharEncodingHandlerPtr handler block
1488 *
1489 * Register the char encoding handler, surprising, isn't it ?
1490 */
1491 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1492 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1493 if (handlers == NULL) xmlInitCharEncodingHandlers();
1494 if ((handler == NULL) || (handlers == NULL)) {
1495 xmlEncodingErr(XML_I18N_NO_HANDLER,
1496 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1497 goto free_handler;
1498 }
1499
1500 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1501 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1502 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1503 "MAX_ENCODING_HANDLERS");
1504 goto free_handler;
1505 }
1506 handlers[nbCharEncodingHandler++] = handler;
1507 return;
1508
1509 free_handler:
1510 if (handler != NULL) {
1511 if (handler->name != NULL) {
1512 xmlFree(handler->name);
1513 }
1514 xmlFree(handler);
1515 }
1516 }
1517
1518 /**
1519 * xmlGetCharEncodingHandler:
1520 * @enc: an xmlCharEncoding value.
1521 *
1522 * Search in the registered set the handler able to read/write that encoding.
1523 *
1524 * Returns the handler or NULL if not found
1525 */
1526 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1527 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1528 xmlCharEncodingHandlerPtr handler;
1529
1530 if (handlers == NULL) xmlInitCharEncodingHandlers();
1531 switch (enc) {
1532 case XML_CHAR_ENCODING_ERROR:
1533 return(NULL);
1534 case XML_CHAR_ENCODING_NONE:
1535 return(NULL);
1536 case XML_CHAR_ENCODING_UTF8:
1537 return(NULL);
1538 case XML_CHAR_ENCODING_UTF16LE:
1539 return(xmlUTF16LEHandler);
1540 case XML_CHAR_ENCODING_UTF16BE:
1541 return(xmlUTF16BEHandler);
1542 case XML_CHAR_ENCODING_EBCDIC:
1543 handler = xmlFindCharEncodingHandler("EBCDIC");
1544 if (handler != NULL) return(handler);
1545 handler = xmlFindCharEncodingHandler("ebcdic");
1546 if (handler != NULL) return(handler);
1547 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1548 if (handler != NULL) return(handler);
1549 handler = xmlFindCharEncodingHandler("IBM-037");
1550 if (handler != NULL) return(handler);
1551 break;
1552 case XML_CHAR_ENCODING_UCS4BE:
1553 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1554 if (handler != NULL) return(handler);
1555 handler = xmlFindCharEncodingHandler("UCS-4");
1556 if (handler != NULL) return(handler);
1557 handler = xmlFindCharEncodingHandler("UCS4");
1558 if (handler != NULL) return(handler);
1559 break;
1560 case XML_CHAR_ENCODING_UCS4LE:
1561 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1562 if (handler != NULL) return(handler);
1563 handler = xmlFindCharEncodingHandler("UCS-4");
1564 if (handler != NULL) return(handler);
1565 handler = xmlFindCharEncodingHandler("UCS4");
1566 if (handler != NULL) return(handler);
1567 break;
1568 case XML_CHAR_ENCODING_UCS4_2143:
1569 break;
1570 case XML_CHAR_ENCODING_UCS4_3412:
1571 break;
1572 case XML_CHAR_ENCODING_UCS2:
1573 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1574 if (handler != NULL) return(handler);
1575 handler = xmlFindCharEncodingHandler("UCS-2");
1576 if (handler != NULL) return(handler);
1577 handler = xmlFindCharEncodingHandler("UCS2");
1578 if (handler != NULL) return(handler);
1579 break;
1580
1581 /*
1582 * We used to keep ISO Latin encodings native in the
1583 * generated data. This led to so many problems that
1584 * this has been removed. One can still change this
1585 * back by registering no-ops encoders for those
1586 */
1587 case XML_CHAR_ENCODING_8859_1:
1588 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1589 if (handler != NULL) return(handler);
1590 break;
1591 case XML_CHAR_ENCODING_8859_2:
1592 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1593 if (handler != NULL) return(handler);
1594 break;
1595 case XML_CHAR_ENCODING_8859_3:
1596 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1597 if (handler != NULL) return(handler);
1598 break;
1599 case XML_CHAR_ENCODING_8859_4:
1600 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1601 if (handler != NULL) return(handler);
1602 break;
1603 case XML_CHAR_ENCODING_8859_5:
1604 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1605 if (handler != NULL) return(handler);
1606 break;
1607 case XML_CHAR_ENCODING_8859_6:
1608 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1609 if (handler != NULL) return(handler);
1610 break;
1611 case XML_CHAR_ENCODING_8859_7:
1612 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1613 if (handler != NULL) return(handler);
1614 break;
1615 case XML_CHAR_ENCODING_8859_8:
1616 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1617 if (handler != NULL) return(handler);
1618 break;
1619 case XML_CHAR_ENCODING_8859_9:
1620 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1621 if (handler != NULL) return(handler);
1622 break;
1623
1624
1625 case XML_CHAR_ENCODING_2022_JP:
1626 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1627 if (handler != NULL) return(handler);
1628 break;
1629 case XML_CHAR_ENCODING_SHIFT_JIS:
1630 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1631 if (handler != NULL) return(handler);
1632 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1633 if (handler != NULL) return(handler);
1634 handler = xmlFindCharEncodingHandler("Shift_JIS");
1635 if (handler != NULL) return(handler);
1636 break;
1637 case XML_CHAR_ENCODING_EUC_JP:
1638 handler = xmlFindCharEncodingHandler("EUC-JP");
1639 if (handler != NULL) return(handler);
1640 break;
1641 default:
1642 break;
1643 }
1644
1645 #ifdef DEBUG_ENCODING
1646 xmlGenericError(xmlGenericErrorContext,
1647 "No handler found for encoding %d\n", enc);
1648 #endif
1649 return(NULL);
1650 }
1651
1652 /**
1653 * xmlFindCharEncodingHandler:
1654 * @name: a string describing the char encoding.
1655 *
1656 * Search in the registered set the handler able to read/write that encoding.
1657 *
1658 * Returns the handler or NULL if not found
1659 */
1660 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1661 xmlFindCharEncodingHandler(const char *name) {
1662 const char *nalias;
1663 const char *norig;
1664 xmlCharEncoding alias;
1665 #ifdef LIBXML_ICONV_ENABLED
1666 xmlCharEncodingHandlerPtr enc;
1667 iconv_t icv_in, icv_out;
1668 #endif /* LIBXML_ICONV_ENABLED */
1669 #ifdef LIBXML_ICU_ENABLED
1670 xmlCharEncodingHandlerPtr encu;
1671 uconv_t *ucv_in, *ucv_out;
1672 #endif /* LIBXML_ICU_ENABLED */
1673 char upper[100];
1674 int i;
1675
1676 if (handlers == NULL) xmlInitCharEncodingHandlers();
1677 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1678 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1679
1680 /*
1681 * Do the alias resolution
1682 */
1683 norig = name;
1684 nalias = xmlGetEncodingAlias(name);
1685 if (nalias != NULL)
1686 name = nalias;
1687
1688 /*
1689 * Check first for directly registered encoding names
1690 */
1691 for (i = 0;i < 99;i++) {
1692 upper[i] = toupper(name[i]);
1693 if (upper[i] == 0) break;
1694 }
1695 upper[i] = 0;
1696
1697 if (handlers != NULL) {
1698 for (i = 0;i < nbCharEncodingHandler; i++) {
1699 if (!strcmp(upper, handlers[i]->name)) {
1700 #ifdef DEBUG_ENCODING
1701 xmlGenericError(xmlGenericErrorContext,
1702 "Found registered handler for encoding %s\n", name);
1703 #endif
1704 return(handlers[i]);
1705 }
1706 }
1707 }
1708
1709 #ifdef LIBXML_ICONV_ENABLED
1710 /* check whether iconv can handle this */
1711 icv_in = iconv_open("UTF-8", name);
1712 icv_out = iconv_open(name, "UTF-8");
1713 if (icv_in == (iconv_t) -1) {
1714 icv_in = iconv_open("UTF-8", upper);
1715 }
1716 if (icv_out == (iconv_t) -1) {
1717 icv_out = iconv_open(upper, "UTF-8");
1718 }
1719 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1720 enc = (xmlCharEncodingHandlerPtr)
1721 xmlMalloc(sizeof(xmlCharEncodingHandler));
1722 if (enc == NULL) {
1723 iconv_close(icv_in);
1724 iconv_close(icv_out);
1725 return(NULL);
1726 }
1727 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1728 enc->name = xmlMemStrdup(name);
1729 enc->input = NULL;
1730 enc->output = NULL;
1731 enc->iconv_in = icv_in;
1732 enc->iconv_out = icv_out;
1733 #ifdef DEBUG_ENCODING
1734 xmlGenericError(xmlGenericErrorContext,
1735 "Found iconv handler for encoding %s\n", name);
1736 #endif
1737 return enc;
1738 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1739 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1740 "iconv : problems with filters for '%s'\n", name);
1741 }
1742 #endif /* LIBXML_ICONV_ENABLED */
1743 #ifdef LIBXML_ICU_ENABLED
1744 /* check whether icu can handle this */
1745 ucv_in = openIcuConverter(name, 1);
1746 ucv_out = openIcuConverter(name, 0);
1747 if (ucv_in != NULL && ucv_out != NULL) {
1748 encu = (xmlCharEncodingHandlerPtr)
1749 xmlMalloc(sizeof(xmlCharEncodingHandler));
1750 if (encu == NULL) {
1751 closeIcuConverter(ucv_in);
1752 closeIcuConverter(ucv_out);
1753 return(NULL);
1754 }
1755 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1756 encu->name = xmlMemStrdup(name);
1757 encu->input = NULL;
1758 encu->output = NULL;
1759 encu->uconv_in = ucv_in;
1760 encu->uconv_out = ucv_out;
1761 #ifdef DEBUG_ENCODING
1762 xmlGenericError(xmlGenericErrorContext,
1763 "Found ICU converter handler for encoding %s\n", name);
1764 #endif
1765 return encu;
1766 } else if (ucv_in != NULL || ucv_out != NULL) {
1767 closeIcuConverter(ucv_in);
1768 closeIcuConverter(ucv_out);
1769 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1770 "ICU converter : problems with filters for '%s'\n", name);
1771 }
1772 #endif /* LIBXML_ICU_ENABLED */
1773
1774 #ifdef DEBUG_ENCODING
1775 xmlGenericError(xmlGenericErrorContext,
1776 "No handler found for encoding %s\n", name);
1777 #endif
1778
1779 /*
1780 * Fallback using the canonical names
1781 */
1782 alias = xmlParseCharEncoding(norig);
1783 if (alias != XML_CHAR_ENCODING_ERROR) {
1784 const char* canon;
1785 canon = xmlGetCharEncodingName(alias);
1786 if ((canon != NULL) && (strcmp(name, canon))) {
1787 return(xmlFindCharEncodingHandler(canon));
1788 }
1789 }
1790
1791 /* If "none of the above", give up */
1792 return(NULL);
1793 }
1794
1795 /************************************************************************
1796 * *
1797 * ICONV based generic conversion functions *
1798 * *
1799 ************************************************************************/
1800
1801 #ifdef LIBXML_ICONV_ENABLED
1802 /**
1803 * xmlIconvWrapper:
1804 * @cd: iconv converter data structure
1805 * @out: a pointer to an array of bytes to store the result
1806 * @outlen: the length of @out
1807 * @in: a pointer to an array of input bytes
1808 * @inlen: the length of @in
1809 *
1810 * Returns 0 if success, or
1811 * -1 by lack of space, or
1812 * -2 if the transcoding fails (for *in is not valid utf8 string or
1813 * the result of transformation can't fit into the encoding we want), or
1814 * -3 if there the last byte can't form a single output char.
1815 *
1816 * The value of @inlen after return is the number of octets consumed
1817 * as the return value is positive, else unpredictable.
1818 * The value of @outlen after return is the number of octets produced.
1819 */
1820 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1821 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1822 const unsigned char *in, int *inlen) {
1823 size_t icv_inlen, icv_outlen;
1824 const char *icv_in = (const char *) in;
1825 char *icv_out = (char *) out;
1826 size_t ret;
1827
1828 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1829 if (outlen != NULL) *outlen = 0;
1830 return(-1);
1831 }
1832 icv_inlen = *inlen;
1833 icv_outlen = *outlen;
1834 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1835 *inlen -= icv_inlen;
1836 *outlen -= icv_outlen;
1837 if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1838 #ifdef EILSEQ
1839 if (errno == EILSEQ) {
1840 return -2;
1841 } else
1842 #endif
1843 #ifdef E2BIG
1844 if (errno == E2BIG) {
1845 return -1;
1846 } else
1847 #endif
1848 #ifdef EINVAL
1849 if (errno == EINVAL) {
1850 return -3;
1851 } else
1852 #endif
1853 {
1854 return -3;
1855 }
1856 }
1857 return 0;
1858 }
1859 #endif /* LIBXML_ICONV_ENABLED */
1860
1861 /************************************************************************
1862 * *
1863 * ICU based generic conversion functions *
1864 * *
1865 ************************************************************************/
1866
1867 #ifdef LIBXML_ICU_ENABLED
1868 /**
1869 * xmlUconvWrapper:
1870 * @cd: ICU uconverter data structure
1871 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1872 * @out: a pointer to an array of bytes to store the result
1873 * @outlen: the length of @out
1874 * @in: a pointer to an array of input bytes
1875 * @inlen: the length of @in
1876 * @flush: if true, indicates end of input
1877 *
1878 * Returns 0 if success, or
1879 * -1 by lack of space, or
1880 * -2 if the transcoding fails (for *in is not valid utf8 string or
1881 * the result of transformation can't fit into the encoding we want), or
1882 * -3 if there the last byte can't form a single output char.
1883 *
1884 * The value of @inlen after return is the number of octets consumed
1885 * as the return value is positive, else unpredictable.
1886 * The value of @outlen after return is the number of octets produced.
1887 */
1888 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1889 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1890 const unsigned char *in, int *inlen, int flush) {
1891 const char *ucv_in = (const char *) in;
1892 char *ucv_out = (char *) out;
1893 UErrorCode err = U_ZERO_ERROR;
1894
1895 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1896 if (outlen != NULL) *outlen = 0;
1897 return(-1);
1898 }
1899
1900 if (toUnicode) {
1901 /* encoding => UTF-16 => UTF-8 */
1902 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1903 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1904 &cd->pivot_source, &cd->pivot_target,
1905 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1906 } else {
1907 /* UTF-8 => UTF-16 => encoding */
1908 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1909 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1910 &cd->pivot_source, &cd->pivot_target,
1911 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1912 }
1913 *inlen = ucv_in - (const char*) in;
1914 *outlen = ucv_out - (char *) out;
1915 if (U_SUCCESS(err)) {
1916 /* reset pivot buf if this is the last call for input (flush==TRUE) */
1917 if (flush)
1918 cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1919 return 0;
1920 }
1921 if (err == U_BUFFER_OVERFLOW_ERROR)
1922 return -1;
1923 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1924 return -2;
1925 return -3;
1926 }
1927 #endif /* LIBXML_ICU_ENABLED */
1928
1929 /************************************************************************
1930 * *
1931 * The real API used by libxml for on-the-fly conversion *
1932 * *
1933 ************************************************************************/
1934
1935 /**
1936 * xmlEncInputChunk:
1937 * @handler: encoding handler
1938 * @out: a pointer to an array of bytes to store the result
1939 * @outlen: the length of @out
1940 * @in: a pointer to an array of input bytes
1941 * @inlen: the length of @in
1942 * @flush: flush (ICU-related)
1943 *
1944 * Returns 0 if success, or
1945 * -1 by lack of space, or
1946 * -2 if the transcoding fails (for *in is not valid utf8 string or
1947 * the result of transformation can't fit into the encoding we want), or
1948 * -3 if there the last byte can't form a single output char.
1949 *
1950 * The value of @inlen after return is the number of octets consumed
1951 * as the return value is 0, else unpredictable.
1952 * The value of @outlen after return is the number of octets produced.
1953 */
1954 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1955 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1956 int *outlen, const unsigned char *in, int *inlen, int flush) {
1957 int ret;
1958 (void)flush;
1959
1960 if (handler->input != NULL) {
1961 ret = handler->input(out, outlen, in, inlen);
1962 if (ret > 0)
1963 ret = 0;
1964 }
1965 #ifdef LIBXML_ICONV_ENABLED
1966 else if (handler->iconv_in != NULL) {
1967 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1968 }
1969 #endif /* LIBXML_ICONV_ENABLED */
1970 #ifdef LIBXML_ICU_ENABLED
1971 else if (handler->uconv_in != NULL) {
1972 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1973 flush);
1974 }
1975 #endif /* LIBXML_ICU_ENABLED */
1976 else {
1977 *outlen = 0;
1978 *inlen = 0;
1979 ret = -2;
1980 }
1981
1982 return(ret);
1983 }
1984
1985 /**
1986 * xmlEncOutputChunk:
1987 * @handler: encoding handler
1988 * @out: a pointer to an array of bytes to store the result
1989 * @outlen: the length of @out
1990 * @in: a pointer to an array of input bytes
1991 * @inlen: the length of @in
1992 *
1993 * Returns 0 if success, or
1994 * -1 by lack of space, or
1995 * -2 if the transcoding fails (for *in is not valid utf8 string or
1996 * the result of transformation can't fit into the encoding we want), or
1997 * -3 if there the last byte can't form a single output char.
1998 * -4 if no output function was found.
1999 *
2000 * The value of @inlen after return is the number of octets consumed
2001 * as the return value is 0, else unpredictable.
2002 * The value of @outlen after return is the number of octets produced.
2003 */
2004 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2005 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2006 int *outlen, const unsigned char *in, int *inlen) {
2007 int ret;
2008
2009 if (handler->output != NULL) {
2010 ret = handler->output(out, outlen, in, inlen);
2011 if (ret > 0)
2012 ret = 0;
2013 }
2014 #ifdef LIBXML_ICONV_ENABLED
2015 else if (handler->iconv_out != NULL) {
2016 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2017 }
2018 #endif /* LIBXML_ICONV_ENABLED */
2019 #ifdef LIBXML_ICU_ENABLED
2020 else if (handler->uconv_out != NULL) {
2021 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2022 1);
2023 }
2024 #endif /* LIBXML_ICU_ENABLED */
2025 else {
2026 *outlen = 0;
2027 *inlen = 0;
2028 ret = -4;
2029 }
2030
2031 return(ret);
2032 }
2033
2034 /**
2035 * xmlCharEncFirstLineInt:
2036 * @handler: char encoding transformation data structure
2037 * @out: an xmlBuffer for the output.
2038 * @in: an xmlBuffer for the input
2039 * @len: number of bytes to convert for the first line, or -1
2040 *
2041 * Front-end for the encoding handler input function, but handle only
2042 * the very first line, i.e. limit itself to 45 chars.
2043 *
2044 * Returns the number of byte written if success, or
2045 * -1 general error
2046 * -2 if the transcoding fails (for *in is not valid utf8 string or
2047 * the result of transformation can't fit into the encoding we want), or
2048 */
2049 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2050 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2051 xmlBufferPtr in, int len) {
2052 int ret;
2053 int written;
2054 int toconv;
2055
2056 if (handler == NULL) return(-1);
2057 if (out == NULL) return(-1);
2058 if (in == NULL) return(-1);
2059
2060 /* calculate space available */
2061 written = out->size - out->use - 1; /* count '\0' */
2062 toconv = in->use;
2063 /*
2064 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2065 * 45 chars should be sufficient to reach the end of the encoding
2066 * declaration without going too far inside the document content.
2067 * on UTF-16 this means 90bytes, on UCS4 this means 180
2068 * The actual value depending on guessed encoding is passed as @len
2069 * if provided
2070 */
2071 if (len >= 0) {
2072 if (toconv > len)
2073 toconv = len;
2074 } else {
2075 if (toconv > 180)
2076 toconv = 180;
2077 }
2078 if (toconv * 2 >= written) {
2079 xmlBufferGrow(out, toconv * 2);
2080 written = out->size - out->use - 1;
2081 }
2082
2083 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2084 in->content, &toconv, 0);
2085 xmlBufferShrink(in, toconv);
2086 out->use += written;
2087 out->content[out->use] = 0;
2088 if (ret == -1) ret = -3;
2089
2090 #ifdef DEBUG_ENCODING
2091 switch (ret) {
2092 case 0:
2093 xmlGenericError(xmlGenericErrorContext,
2094 "converted %d bytes to %d bytes of input\n",
2095 toconv, written);
2096 break;
2097 case -1:
2098 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2099 toconv, written, in->use);
2100 break;
2101 case -2:
2102 xmlGenericError(xmlGenericErrorContext,
2103 "input conversion failed due to input error\n");
2104 break;
2105 case -3:
2106 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2107 toconv, written, in->use);
2108 break;
2109 default:
2110 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2111 }
2112 #endif /* DEBUG_ENCODING */
2113 /*
2114 * Ignore when input buffer is not on a boundary
2115 */
2116 if (ret == -3) ret = 0;
2117 if (ret == -1) ret = 0;
2118 return(written ? written : ret);
2119 }
2120
2121 /**
2122 * xmlCharEncFirstLine:
2123 * @handler: char encoding transformation data structure
2124 * @out: an xmlBuffer for the output.
2125 * @in: an xmlBuffer for the input
2126 *
2127 * Front-end for the encoding handler input function, but handle only
2128 * the very first line, i.e. limit itself to 45 chars.
2129 *
2130 * Returns the number of byte written if success, or
2131 * -1 general error
2132 * -2 if the transcoding fails (for *in is not valid utf8 string or
2133 * the result of transformation can't fit into the encoding we want), or
2134 */
2135 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2136 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2137 xmlBufferPtr in) {
2138 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2139 }
2140
2141 /**
2142 * xmlCharEncFirstLineInput:
2143 * @input: a parser input buffer
2144 * @len: number of bytes to convert for the first line, or -1
2145 *
2146 * Front-end for the encoding handler input function, but handle only
2147 * the very first line. Point is that this is based on autodetection
2148 * of the encoding and once that first line is converted we may find
2149 * out that a different decoder is needed to process the input.
2150 *
2151 * Returns the number of byte written if success, or
2152 * -1 general error
2153 * -2 if the transcoding fails (for *in is not valid utf8 string or
2154 * the result of transformation can't fit into the encoding we want), or
2155 */
2156 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2157 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2158 {
2159 int ret;
2160 size_t written;
2161 size_t toconv;
2162 int c_in;
2163 int c_out;
2164 xmlBufPtr in;
2165 xmlBufPtr out;
2166
2167 if ((input == NULL) || (input->encoder == NULL) ||
2168 (input->buffer == NULL) || (input->raw == NULL))
2169 return (-1);
2170 out = input->buffer;
2171 in = input->raw;
2172
2173 toconv = xmlBufUse(in);
2174 if (toconv == 0)
2175 return (0);
2176 written = xmlBufAvail(out) - 1; /* count '\0' */
2177 /*
2178 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2179 * 45 chars should be sufficient to reach the end of the encoding
2180 * declaration without going too far inside the document content.
2181 * on UTF-16 this means 90bytes, on UCS4 this means 180
2182 * The actual value depending on guessed encoding is passed as @len
2183 * if provided
2184 */
2185 if (len >= 0) {
2186 if (toconv > (unsigned int) len)
2187 toconv = len;
2188 } else {
2189 if (toconv > 180)
2190 toconv = 180;
2191 }
2192 if (toconv * 2 >= written) {
2193 xmlBufGrow(out, toconv * 2);
2194 written = xmlBufAvail(out) - 1;
2195 }
2196 if (written > 360)
2197 written = 360;
2198
2199 c_in = toconv;
2200 c_out = written;
2201 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2202 xmlBufContent(in), &c_in, 0);
2203 xmlBufShrink(in, c_in);
2204 xmlBufAddLen(out, c_out);
2205 if (ret == -1)
2206 ret = -3;
2207
2208 switch (ret) {
2209 case 0:
2210 #ifdef DEBUG_ENCODING
2211 xmlGenericError(xmlGenericErrorContext,
2212 "converted %d bytes to %d bytes of input\n",
2213 c_in, c_out);
2214 #endif
2215 break;
2216 case -1:
2217 #ifdef DEBUG_ENCODING
2218 xmlGenericError(xmlGenericErrorContext,
2219 "converted %d bytes to %d bytes of input, %d left\n",
2220 c_in, c_out, (int)xmlBufUse(in));
2221 #endif
2222 break;
2223 case -3:
2224 #ifdef DEBUG_ENCODING
2225 xmlGenericError(xmlGenericErrorContext,
2226 "converted %d bytes to %d bytes of input, %d left\n",
2227 c_in, c_out, (int)xmlBufUse(in));
2228 #endif
2229 break;
2230 case -2: {
2231 char buf[50];
2232 const xmlChar *content = xmlBufContent(in);
2233
2234 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2235 content[0], content[1],
2236 content[2], content[3]);
2237 buf[49] = 0;
2238 xmlEncodingErr(XML_I18N_CONV_FAILED,
2239 "input conversion failed due to input error, bytes %s\n",
2240 buf);
2241 }
2242 }
2243 /*
2244 * Ignore when input buffer is not on a boundary
2245 */
2246 if (ret == -3) ret = 0;
2247 if (ret == -1) ret = 0;
2248 return(c_out ? c_out : ret);
2249 }
2250
2251 /**
2252 * xmlCharEncInput:
2253 * @input: a parser input buffer
2254 * @flush: try to flush all the raw buffer
2255 *
2256 * Generic front-end for the encoding handler on parser input
2257 *
2258 * Returns the number of byte written if success, or
2259 * -1 general error
2260 * -2 if the transcoding fails (for *in is not valid utf8 string or
2261 * the result of transformation can't fit into the encoding we want), or
2262 */
2263 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2264 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2265 {
2266 int ret;
2267 size_t written;
2268 size_t toconv;
2269 int c_in;
2270 int c_out;
2271 xmlBufPtr in;
2272 xmlBufPtr out;
2273
2274 if ((input == NULL) || (input->encoder == NULL) ||
2275 (input->buffer == NULL) || (input->raw == NULL))
2276 return (-1);
2277 out = input->buffer;
2278 in = input->raw;
2279
2280 toconv = xmlBufUse(in);
2281 if (toconv == 0)
2282 return (0);
2283 if ((toconv > 64 * 1024) && (flush == 0))
2284 toconv = 64 * 1024;
2285 written = xmlBufAvail(out);
2286 if (written > 0)
2287 written--; /* count '\0' */
2288 if (toconv * 2 >= written) {
2289 xmlBufGrow(out, toconv * 2);
2290 written = xmlBufAvail(out);
2291 if (written > 0)
2292 written--; /* count '\0' */
2293 }
2294 if ((written > 128 * 1024) && (flush == 0))
2295 written = 128 * 1024;
2296
2297 c_in = toconv;
2298 c_out = written;
2299 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2300 xmlBufContent(in), &c_in, flush);
2301 xmlBufShrink(in, c_in);
2302 xmlBufAddLen(out, c_out);
2303 if (ret == -1)
2304 ret = -3;
2305
2306 switch (ret) {
2307 case 0:
2308 #ifdef DEBUG_ENCODING
2309 xmlGenericError(xmlGenericErrorContext,
2310 "converted %d bytes to %d bytes of input\n",
2311 c_in, c_out);
2312 #endif
2313 break;
2314 case -1:
2315 #ifdef DEBUG_ENCODING
2316 xmlGenericError(xmlGenericErrorContext,
2317 "converted %d bytes to %d bytes of input, %d left\n",
2318 c_in, c_out, (int)xmlBufUse(in));
2319 #endif
2320 break;
2321 case -3:
2322 #ifdef DEBUG_ENCODING
2323 xmlGenericError(xmlGenericErrorContext,
2324 "converted %d bytes to %d bytes of input, %d left\n",
2325 c_in, c_out, (int)xmlBufUse(in));
2326 #endif
2327 break;
2328 case -2: {
2329 char buf[50];
2330 const xmlChar *content = xmlBufContent(in);
2331
2332 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2333 content[0], content[1],
2334 content[2], content[3]);
2335 buf[49] = 0;
2336 xmlEncodingErr(XML_I18N_CONV_FAILED,
2337 "input conversion failed due to input error, bytes %s\n",
2338 buf);
2339 }
2340 }
2341 /*
2342 * Ignore when input buffer is not on a boundary
2343 */
2344 if (ret == -3)
2345 ret = 0;
2346 return (c_out? c_out : ret);
2347 }
2348
2349 /**
2350 * xmlCharEncInFunc:
2351 * @handler: char encoding transformation data structure
2352 * @out: an xmlBuffer for the output.
2353 * @in: an xmlBuffer for the input
2354 *
2355 * Generic front-end for the encoding handler input function
2356 *
2357 * Returns the number of byte written if success, or
2358 * -1 general error
2359 * -2 if the transcoding fails (for *in is not valid utf8 string or
2360 * the result of transformation can't fit into the encoding we want), or
2361 */
2362 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2363 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2364 xmlBufferPtr in)
2365 {
2366 int ret;
2367 int written;
2368 int toconv;
2369
2370 if (handler == NULL)
2371 return (-1);
2372 if (out == NULL)
2373 return (-1);
2374 if (in == NULL)
2375 return (-1);
2376
2377 toconv = in->use;
2378 if (toconv == 0)
2379 return (0);
2380 written = out->size - out->use -1; /* count '\0' */
2381 if (toconv * 2 >= written) {
2382 xmlBufferGrow(out, out->size + toconv * 2);
2383 written = out->size - out->use - 1;
2384 }
2385 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2386 in->content, &toconv, 1);
2387 xmlBufferShrink(in, toconv);
2388 out->use += written;
2389 out->content[out->use] = 0;
2390 if (ret == -1)
2391 ret = -3;
2392
2393 switch (ret) {
2394 case 0:
2395 #ifdef DEBUG_ENCODING
2396 xmlGenericError(xmlGenericErrorContext,
2397 "converted %d bytes to %d bytes of input\n",
2398 toconv, written);
2399 #endif
2400 break;
2401 case -1:
2402 #ifdef DEBUG_ENCODING
2403 xmlGenericError(xmlGenericErrorContext,
2404 "converted %d bytes to %d bytes of input, %d left\n",
2405 toconv, written, in->use);
2406 #endif
2407 break;
2408 case -3:
2409 #ifdef DEBUG_ENCODING
2410 xmlGenericError(xmlGenericErrorContext,
2411 "converted %d bytes to %d bytes of input, %d left\n",
2412 toconv, written, in->use);
2413 #endif
2414 break;
2415 case -2: {
2416 char buf[50];
2417
2418 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2419 in->content[0], in->content[1],
2420 in->content[2], in->content[3]);
2421 buf[49] = 0;
2422 xmlEncodingErr(XML_I18N_CONV_FAILED,
2423 "input conversion failed due to input error, bytes %s\n",
2424 buf);
2425 }
2426 }
2427 /*
2428 * Ignore when input buffer is not on a boundary
2429 */
2430 if (ret == -3)
2431 ret = 0;
2432 return (written? written : ret);
2433 }
2434
2435 #ifdef LIBXML_OUTPUT_ENABLED
2436 /**
2437 * xmlCharEncOutput:
2438 * @output: a parser output buffer
2439 * @init: is this an initialization call without data
2440 *
2441 * Generic front-end for the encoding handler on parser output
2442 * a first call with @init == 1 has to be made first to initiate the
2443 * output in case of non-stateless encoding needing to initiate their
2444 * state or the output (like the BOM in UTF16).
2445 * In case of UTF8 sequence conversion errors for the given encoder,
2446 * the content will be automatically remapped to a CharRef sequence.
2447 *
2448 * Returns the number of byte written if success, or
2449 * -1 general error
2450 * -2 if the transcoding fails (for *in is not valid utf8 string or
2451 * the result of transformation can't fit into the encoding we want), or
2452 */
2453 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2454 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2455 {
2456 int ret;
2457 size_t written;
2458 int writtentot = 0;
2459 size_t toconv;
2460 int c_in;
2461 int c_out;
2462 xmlBufPtr in;
2463 xmlBufPtr out;
2464
2465 if ((output == NULL) || (output->encoder == NULL) ||
2466 (output->buffer == NULL) || (output->conv == NULL))
2467 return (-1);
2468 out = output->conv;
2469 in = output->buffer;
2470
2471 retry:
2472
2473 written = xmlBufAvail(out);
2474 if (written > 0)
2475 written--; /* count '\0' */
2476
2477 /*
2478 * First specific handling of the initialization call
2479 */
2480 if (init) {
2481 c_in = 0;
2482 c_out = written;
2483 /* TODO: Check return value. */
2484 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2485 NULL, &c_in);
2486 xmlBufAddLen(out, c_out);
2487 #ifdef DEBUG_ENCODING
2488 xmlGenericError(xmlGenericErrorContext,
2489 "initialized encoder\n");
2490 #endif
2491 return(c_out);
2492 }
2493
2494 /*
2495 * Conversion itself.
2496 */
2497 toconv = xmlBufUse(in);
2498 if (toconv == 0)
2499 return (writtentot);
2500 if (toconv > 64 * 1024)
2501 toconv = 64 * 1024;
2502 if (toconv * 4 >= written) {
2503 xmlBufGrow(out, toconv * 4);
2504 written = xmlBufAvail(out) - 1;
2505 }
2506 if (written > 256 * 1024)
2507 written = 256 * 1024;
2508
2509 c_in = toconv;
2510 c_out = written;
2511 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2512 xmlBufContent(in), &c_in);
2513 xmlBufShrink(in, c_in);
2514 xmlBufAddLen(out, c_out);
2515 writtentot += c_out;
2516 if (ret == -1) {
2517 if (c_out > 0) {
2518 /* Can be a limitation of iconv or uconv */
2519 goto retry;
2520 }
2521 ret = -3;
2522 }
2523
2524 /*
2525 * Attempt to handle error cases
2526 */
2527 switch (ret) {
2528 case 0:
2529 #ifdef DEBUG_ENCODING
2530 xmlGenericError(xmlGenericErrorContext,
2531 "converted %d bytes to %d bytes of output\n",
2532 c_in, c_out);
2533 #endif
2534 break;
2535 case -1:
2536 #ifdef DEBUG_ENCODING
2537 xmlGenericError(xmlGenericErrorContext,
2538 "output conversion failed by lack of space\n");
2539 #endif
2540 break;
2541 case -3:
2542 #ifdef DEBUG_ENCODING
2543 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2544 c_in, c_out, (int) xmlBufUse(in));
2545 #endif
2546 break;
2547 case -4:
2548 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2549 "xmlCharEncOutFunc: no output function !\n", NULL);
2550 ret = -1;
2551 break;
2552 case -2: {
2553 xmlChar charref[20];
2554 int len = (int) xmlBufUse(in);
2555 xmlChar *content = xmlBufContent(in);
2556 int cur, charrefLen;
2557
2558 cur = xmlGetUTF8Char(content, &len);
2559 if (cur <= 0)
2560 break;
2561
2562 #ifdef DEBUG_ENCODING
2563 xmlGenericError(xmlGenericErrorContext,
2564 "handling output conversion error\n");
2565 xmlGenericError(xmlGenericErrorContext,
2566 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2567 content[0], content[1],
2568 content[2], content[3]);
2569 #endif
2570 /*
2571 * Removes the UTF8 sequence, and replace it by a charref
2572 * and continue the transcoding phase, hoping the error
2573 * did not mangle the encoder state.
2574 */
2575 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2576 "&#%d;", cur);
2577 xmlBufShrink(in, len);
2578 xmlBufGrow(out, charrefLen * 4);
2579 c_out = xmlBufAvail(out) - 1;
2580 c_in = charrefLen;
2581 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2582 charref, &c_in);
2583
2584 if ((ret < 0) || (c_in != charrefLen)) {
2585 char buf[50];
2586
2587 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2588 content[0], content[1],
2589 content[2], content[3]);
2590 buf[49] = 0;
2591 xmlEncodingErr(XML_I18N_CONV_FAILED,
2592 "output conversion failed due to conv error, bytes %s\n",
2593 buf);
2594 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2595 content[0] = ' ';
2596 break;
2597 }
2598
2599 xmlBufAddLen(out, c_out);
2600 writtentot += c_out;
2601 goto retry;
2602 }
2603 }
2604 return(writtentot ? writtentot : ret);
2605 }
2606 #endif
2607
2608 /**
2609 * xmlCharEncOutFunc:
2610 * @handler: char encoding transformation data structure
2611 * @out: an xmlBuffer for the output.
2612 * @in: an xmlBuffer for the input
2613 *
2614 * Generic front-end for the encoding handler output function
2615 * a first call with @in == NULL has to be made firs to initiate the
2616 * output in case of non-stateless encoding needing to initiate their
2617 * state or the output (like the BOM in UTF16).
2618 * In case of UTF8 sequence conversion errors for the given encoder,
2619 * the content will be automatically remapped to a CharRef sequence.
2620 *
2621 * Returns the number of byte written if success, or
2622 * -1 general error
2623 * -2 if the transcoding fails (for *in is not valid utf8 string or
2624 * the result of transformation can't fit into the encoding we want), or
2625 */
2626 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2627 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2628 xmlBufferPtr in) {
2629 int ret;
2630 int written;
2631 int writtentot = 0;
2632 int toconv;
2633
2634 if (handler == NULL) return(-1);
2635 if (out == NULL) return(-1);
2636
2637 retry:
2638
2639 written = out->size - out->use;
2640
2641 if (written > 0)
2642 written--; /* Gennady: count '/0' */
2643
2644 /*
2645 * First specific handling of in = NULL, i.e. the initialization call
2646 */
2647 if (in == NULL) {
2648 toconv = 0;
2649 /* TODO: Check return value. */
2650 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2651 NULL, &toconv);
2652 out->use += written;
2653 out->content[out->use] = 0;
2654 #ifdef DEBUG_ENCODING
2655 xmlGenericError(xmlGenericErrorContext,
2656 "initialized encoder\n");
2657 #endif
2658 return(0);
2659 }
2660
2661 /*
2662 * Conversion itself.
2663 */
2664 toconv = in->use;
2665 if (toconv == 0)
2666 return(0);
2667 if (toconv * 4 >= written) {
2668 xmlBufferGrow(out, toconv * 4);
2669 written = out->size - out->use - 1;
2670 }
2671 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2672 in->content, &toconv);
2673 xmlBufferShrink(in, toconv);
2674 out->use += written;
2675 writtentot += written;
2676 out->content[out->use] = 0;
2677 if (ret == -1) {
2678 if (written > 0) {
2679 /* Can be a limitation of iconv or uconv */
2680 goto retry;
2681 }
2682 ret = -3;
2683 }
2684
2685 /*
2686 * Attempt to handle error cases
2687 */
2688 switch (ret) {
2689 case 0:
2690 #ifdef DEBUG_ENCODING
2691 xmlGenericError(xmlGenericErrorContext,
2692 "converted %d bytes to %d bytes of output\n",
2693 toconv, written);
2694 #endif
2695 break;
2696 case -1:
2697 #ifdef DEBUG_ENCODING
2698 xmlGenericError(xmlGenericErrorContext,
2699 "output conversion failed by lack of space\n");
2700 #endif
2701 break;
2702 case -3:
2703 #ifdef DEBUG_ENCODING
2704 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2705 toconv, written, in->use);
2706 #endif
2707 break;
2708 case -4:
2709 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2710 "xmlCharEncOutFunc: no output function !\n", NULL);
2711 ret = -1;
2712 break;
2713 case -2: {
2714 xmlChar charref[20];
2715 int len = in->use;
2716 const xmlChar *utf = (const xmlChar *) in->content;
2717 int cur, charrefLen;
2718
2719 cur = xmlGetUTF8Char(utf, &len);
2720 if (cur <= 0)
2721 break;
2722
2723 #ifdef DEBUG_ENCODING
2724 xmlGenericError(xmlGenericErrorContext,
2725 "handling output conversion error\n");
2726 xmlGenericError(xmlGenericErrorContext,
2727 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2728 in->content[0], in->content[1],
2729 in->content[2], in->content[3]);
2730 #endif
2731 /*
2732 * Removes the UTF8 sequence, and replace it by a charref
2733 * and continue the transcoding phase, hoping the error
2734 * did not mangle the encoder state.
2735 */
2736 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2737 "&#%d;", cur);
2738 xmlBufferShrink(in, len);
2739 xmlBufferGrow(out, charrefLen * 4);
2740 written = out->size - out->use - 1;
2741 toconv = charrefLen;
2742 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2743 charref, &toconv);
2744
2745 if ((ret < 0) || (toconv != charrefLen)) {
2746 char buf[50];
2747
2748 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2749 in->content[0], in->content[1],
2750 in->content[2], in->content[3]);
2751 buf[49] = 0;
2752 xmlEncodingErr(XML_I18N_CONV_FAILED,
2753 "output conversion failed due to conv error, bytes %s\n",
2754 buf);
2755 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2756 in->content[0] = ' ';
2757 break;
2758 }
2759
2760 out->use += written;
2761 writtentot += written;
2762 out->content[out->use] = 0;
2763 goto retry;
2764 }
2765 }
2766 return(writtentot ? writtentot : ret);
2767 }
2768
2769 /**
2770 * xmlCharEncCloseFunc:
2771 * @handler: char encoding transformation data structure
2772 *
2773 * Generic front-end for encoding handler close function
2774 *
2775 * Returns 0 if success, or -1 in case of error
2776 */
2777 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2778 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2779 int ret = 0;
2780 int tofree = 0;
2781 int i, handler_in_list = 0;
2782
2783 if (handler == NULL) return(-1);
2784 if (handler->name == NULL) return(-1);
2785 if (handlers != NULL) {
2786 for (i = 0;i < nbCharEncodingHandler; i++) {
2787 if (handler == handlers[i]) {
2788 handler_in_list = 1;
2789 break;
2790 }
2791 }
2792 }
2793 #ifdef LIBXML_ICONV_ENABLED
2794 /*
2795 * Iconv handlers can be used only once, free the whole block.
2796 * and the associated icon resources.
2797 */
2798 if ((handler_in_list == 0) &&
2799 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2800 tofree = 1;
2801 if (handler->iconv_out != NULL) {
2802 if (iconv_close(handler->iconv_out))
2803 ret = -1;
2804 handler->iconv_out = NULL;
2805 }
2806 if (handler->iconv_in != NULL) {
2807 if (iconv_close(handler->iconv_in))
2808 ret = -1;
2809 handler->iconv_in = NULL;
2810 }
2811 }
2812 #endif /* LIBXML_ICONV_ENABLED */
2813 #ifdef LIBXML_ICU_ENABLED
2814 if ((handler_in_list == 0) &&
2815 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2816 tofree = 1;
2817 if (handler->uconv_out != NULL) {
2818 closeIcuConverter(handler->uconv_out);
2819 handler->uconv_out = NULL;
2820 }
2821 if (handler->uconv_in != NULL) {
2822 closeIcuConverter(handler->uconv_in);
2823 handler->uconv_in = NULL;
2824 }
2825 }
2826 #endif
2827 if (tofree) {
2828 /* free up only dynamic handlers iconv/uconv */
2829 if (handler->name != NULL)
2830 xmlFree(handler->name);
2831 handler->name = NULL;
2832 xmlFree(handler);
2833 }
2834 #ifdef DEBUG_ENCODING
2835 if (ret)
2836 xmlGenericError(xmlGenericErrorContext,
2837 "failed to close the encoding handler\n");
2838 else
2839 xmlGenericError(xmlGenericErrorContext,
2840 "closed the encoding handler\n");
2841 #endif
2842
2843 return(ret);
2844 }
2845
2846 /**
2847 * xmlByteConsumed:
2848 * @ctxt: an XML parser context
2849 *
2850 * This function provides the current index of the parser relative
2851 * to the start of the current entity. This function is computed in
2852 * bytes from the beginning starting at zero and finishing at the
2853 * size in byte of the file if parsing a file. The function is
2854 * of constant cost if the input is UTF-8 but can be costly if run
2855 * on non-UTF-8 input.
2856 *
2857 * Returns the index in bytes from the beginning of the entity or -1
2858 * in case the index could not be computed.
2859 */
2860 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2861 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2862 xmlParserInputPtr in;
2863
2864 if (ctxt == NULL) return(-1);
2865 in = ctxt->input;
2866 if (in == NULL) return(-1);
2867 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2868 unsigned int unused = 0;
2869 xmlCharEncodingHandler * handler = in->buf->encoder;
2870 /*
2871 * Encoding conversion, compute the number of unused original
2872 * bytes from the input not consumed and subtract that from
2873 * the raw consumed value, this is not a cheap operation
2874 */
2875 if (in->end - in->cur > 0) {
2876 unsigned char convbuf[32000];
2877 const unsigned char *cur = (const unsigned char *)in->cur;
2878 int toconv = in->end - in->cur, written = 32000;
2879
2880 int ret;
2881
2882 do {
2883 toconv = in->end - cur;
2884 written = 32000;
2885 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2886 cur, &toconv);
2887 if (ret < 0) {
2888 if (written > 0)
2889 ret = -2;
2890 else
2891 return(-1);
2892 }
2893 unused += written;
2894 cur += toconv;
2895 } while (ret == -2);
2896 }
2897 if (in->buf->rawconsumed < unused)
2898 return(-1);
2899 return(in->buf->rawconsumed - unused);
2900 }
2901 return(in->consumed + (in->cur - in->base));
2902 }
2903
2904 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2905 #ifdef LIBXML_ISO8859X_ENABLED
2906
2907 /**
2908 * UTF8ToISO8859x:
2909 * @out: a pointer to an array of bytes to store the result
2910 * @outlen: the length of @out
2911 * @in: a pointer to an array of UTF-8 chars
2912 * @inlen: the length of @in
2913 * @xlattable: the 2-level transcoding table
2914 *
2915 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2916 * block of chars out.
2917 *
2918 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2919 * The value of @inlen after return is the number of octets consumed
2920 * as the return value is positive, else unpredictable.
2921 * The value of @outlen after return is the number of octets consumed.
2922 */
2923 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2924 UTF8ToISO8859x(unsigned char* out, int *outlen,
2925 const unsigned char* in, int *inlen,
2926 unsigned char const *xlattable) {
2927 const unsigned char* outstart = out;
2928 const unsigned char* inend;
2929 const unsigned char* instart = in;
2930 const unsigned char* processed = in;
2931
2932 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2933 (xlattable == NULL))
2934 return(-1);
2935 if (in == NULL) {
2936 /*
2937 * initialization nothing to do
2938 */
2939 *outlen = 0;
2940 *inlen = 0;
2941 return(0);
2942 }
2943 inend = in + (*inlen);
2944 while (in < inend) {
2945 unsigned char d = *in++;
2946 if (d < 0x80) {
2947 *out++ = d;
2948 } else if (d < 0xC0) {
2949 /* trailing byte in leading position */
2950 *outlen = out - outstart;
2951 *inlen = processed - instart;
2952 return(-2);
2953 } else if (d < 0xE0) {
2954 unsigned char c;
2955 if (!(in < inend)) {
2956 /* trailing byte not in input buffer */
2957 *outlen = out - outstart;
2958 *inlen = processed - instart;
2959 return(-3);
2960 }
2961 c = *in++;
2962 if ((c & 0xC0) != 0x80) {
2963 /* not a trailing byte */
2964 *outlen = out - outstart;
2965 *inlen = processed - instart;
2966 return(-2);
2967 }
2968 c = c & 0x3F;
2969 d = d & 0x1F;
2970 d = xlattable [48 + c + xlattable [d] * 64];
2971 if (d == 0) {
2972 /* not in character set */
2973 *outlen = out - outstart;
2974 *inlen = processed - instart;
2975 return(-2);
2976 }
2977 *out++ = d;
2978 } else if (d < 0xF0) {
2979 unsigned char c1;
2980 unsigned char c2;
2981 if (!(in < inend - 1)) {
2982 /* trailing bytes not in input buffer */
2983 *outlen = out - outstart;
2984 *inlen = processed - instart;
2985 return(-3);
2986 }
2987 c1 = *in++;
2988 if ((c1 & 0xC0) != 0x80) {
2989 /* not a trailing byte (c1) */
2990 *outlen = out - outstart;
2991 *inlen = processed - instart;
2992 return(-2);
2993 }
2994 c2 = *in++;
2995 if ((c2 & 0xC0) != 0x80) {
2996 /* not a trailing byte (c2) */
2997 *outlen = out - outstart;
2998 *inlen = processed - instart;
2999 return(-2);
3000 }
3001 c1 = c1 & 0x3F;
3002 c2 = c2 & 0x3F;
3003 d = d & 0x0F;
3004 d = xlattable [48 + c2 + xlattable [48 + c1 +
3005 xlattable [32 + d] * 64] * 64];
3006 if (d == 0) {
3007 /* not in character set */
3008 *outlen = out - outstart;
3009 *inlen = processed - instart;
3010 return(-2);
3011 }
3012 *out++ = d;
3013 } else {
3014 /* cannot transcode >= U+010000 */
3015 *outlen = out - outstart;
3016 *inlen = processed - instart;
3017 return(-2);
3018 }
3019 processed = in;
3020 }
3021 *outlen = out - outstart;
3022 *inlen = processed - instart;
3023 return(*outlen);
3024 }
3025
3026 /**
3027 * ISO8859xToUTF8
3028 * @out: a pointer to an array of bytes to store the result
3029 * @outlen: the length of @out
3030 * @in: a pointer to an array of ISO Latin 1 chars
3031 * @inlen: the length of @in
3032 *
3033 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3034 * block of chars out.
3035 * Returns 0 if success, or -1 otherwise
3036 * The value of @inlen after return is the number of octets consumed
3037 * The value of @outlen after return is the number of octets produced.
3038 */
3039 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3040 ISO8859xToUTF8(unsigned char* out, int *outlen,
3041 const unsigned char* in, int *inlen,
3042 unsigned short const *unicodetable) {
3043 unsigned char* outstart = out;
3044 unsigned char* outend;
3045 const unsigned char* instart = in;
3046 const unsigned char* inend;
3047 const unsigned char* instop;
3048 unsigned int c;
3049
3050 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3051 (in == NULL) || (unicodetable == NULL))
3052 return(-1);
3053 outend = out + *outlen;
3054 inend = in + *inlen;
3055 instop = inend;
3056
3057 while ((in < inend) && (out < outend - 2)) {
3058 if (*in >= 0x80) {
3059 c = unicodetable [*in - 0x80];
3060 if (c == 0) {
3061 /* undefined code point */
3062 *outlen = out - outstart;
3063 *inlen = in - instart;
3064 return (-1);
3065 }
3066 if (c < 0x800) {
3067 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3068 *out++ = (c & 0x3F) | 0x80;
3069 } else {
3070 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3071 *out++ = ((c >> 6) & 0x3F) | 0x80;
3072 *out++ = (c & 0x3F) | 0x80;
3073 }
3074 ++in;
3075 }
3076 if (instop - in > outend - out) instop = in + (outend - out);
3077 while ((*in < 0x80) && (in < instop)) {
3078 *out++ = *in++;
3079 }
3080 }
3081 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3082 *out++ = *in++;
3083 }
3084 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3085 *out++ = *in++;
3086 }
3087 *outlen = out - outstart;
3088 *inlen = in - instart;
3089 return (*outlen);
3090 }
3091
3092
3093 /************************************************************************
3094 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3095 ************************************************************************/
3096
3097 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3098 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3099 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3100 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3101 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3102 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3103 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3104 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3105 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3106 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3107 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3108 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3109 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3110 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3111 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3112 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3113 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3114 };
3115
3116 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3117 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3118 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3125 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3126 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3127 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3128 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3129 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3132 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3137 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3138 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3139 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3140 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3141 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3142 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3143 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3144 };
3145
3146 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3147 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3148 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3149 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3150 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3151 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3152 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3153 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3154 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3155 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3156 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3157 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3158 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3159 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3160 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3161 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3162 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3163 };
3164
3165 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3166 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3174 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3175 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3176 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3177 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3178 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3179 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3180 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3183 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3191 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3192 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3193 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3194 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3195 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3196 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3197 };
3198
3199 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3200 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3201 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3202 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3203 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3204 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3205 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3206 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3207 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3208 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3209 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3210 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3211 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3212 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3213 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3214 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3215 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3216 };
3217
3218 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3219 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3227 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3228 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3229 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3230 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3231 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3233 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3234 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3235 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3236 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3237 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3238 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3239 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3243 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3244 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3245 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3246 };
3247
3248 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3249 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3250 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3251 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3252 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3253 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3254 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3255 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3256 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3257 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3258 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3259 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3260 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3261 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3262 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3263 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3264 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3265 };
3266
3267 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3268 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3276 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3277 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3278 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3280 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3281 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3282 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3283 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3284 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3285 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 };
3296
3297 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3298 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3299 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3300 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3301 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3302 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3303 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3304 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3305 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3306 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3307 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3308 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3309 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3310 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3311 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3312 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3313 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3314 };
3315
3316 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3317 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3325 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3326 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3327 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3334 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3335 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3336 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3337 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340 };
3341
3342 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3343 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3344 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3345 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3346 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3347 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3348 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3349 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3350 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3351 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3352 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3353 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3354 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3355 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3356 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3357 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3358 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3359 };
3360
3361 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3362 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3370 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3371 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3372 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3373 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3386 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3387 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3388 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3389 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3390 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393 };
3394
3395 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3396 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3397 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3398 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3399 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3400 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3401 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3402 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3403 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3404 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3405 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3406 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3407 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3408 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3409 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3410 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3411 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3412 };
3413
3414 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3415 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3423 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3424 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3425 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3426 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3434 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3439 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3444 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3445 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446 };
3447
3448 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3449 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3450 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3451 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3452 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3453 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3454 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3455 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3456 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3457 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3458 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3459 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3460 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3461 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3462 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3463 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3464 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3465 };
3466
3467 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3468 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3476 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3477 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3478 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3479 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3480 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3481 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3482 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491 };
3492
3493 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3494 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3495 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3496 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3497 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3498 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3499 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3500 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3501 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3502 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3503 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3504 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3505 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3506 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3507 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3508 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3509 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3510 };
3511
3512 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3513 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3521 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3522 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3523 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3524 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3525 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3526 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3527 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3528 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3531 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3532 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3541 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3542 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3543 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3544 };
3545
3546 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3547 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3548 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3549 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3550 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3551 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3552 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3553 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3554 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3555 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3556 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3557 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3558 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3559 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3560 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3561 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3562 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3563 };
3564
3565 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3566 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3574 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3575 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3581 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3582 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3583 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3584 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3585 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3586 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3587 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3590 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3591 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593 };
3594
3595 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3596 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3597 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3598 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3599 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3600 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3601 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3602 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3603 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3604 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3605 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3606 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3607 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3608 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3609 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3610 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3611 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3612 };
3613
3614 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3615 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3623 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3624 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3625 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3626 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3636 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3637 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3638 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3639 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3640 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3641 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3642 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3643 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3644 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3645 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3646 };
3647
3648 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3649 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3650 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3651 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3652 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3653 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3654 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3655 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3656 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3657 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3658 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3659 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3660 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3661 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3662 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3663 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3664 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3665 };
3666
3667 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3668 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3676 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3677 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3678 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3683 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3684 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3685 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3703 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3705 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3706 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3708 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3709 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3710 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3711 };
3712
3713 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3714 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3715 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3716 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3717 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3718 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3719 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3720 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3721 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3722 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3723 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3724 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3725 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3726 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3727 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3728 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3729 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3730 };
3731
3732 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3733 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3741 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3742 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3743 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3744 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3756 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3757 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3758 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3759 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3760 };
3761
3762 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3763 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3764 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3765 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3766 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3767 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3768 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3769 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3770 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3771 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3772 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3773 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3774 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3775 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3776 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3777 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3778 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3779 };
3780
3781 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3782 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3786 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3790 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3791 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3792 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3793 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3794 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3795 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3799 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3801 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3811 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3814 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3818 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3819 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3820 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3821 };
3822
3823
3824 /*
3825 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3826 */
3827
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3828 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3829 const unsigned char* in, int *inlen) {
3830 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3831 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3832 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3833 const unsigned char* in, int *inlen) {
3834 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3835 }
3836
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3837 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3838 const unsigned char* in, int *inlen) {
3839 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3840 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3841 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3842 const unsigned char* in, int *inlen) {
3843 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3844 }
3845
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3846 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3847 const unsigned char* in, int *inlen) {
3848 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3849 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3850 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3851 const unsigned char* in, int *inlen) {
3852 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3853 }
3854
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3855 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3856 const unsigned char* in, int *inlen) {
3857 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3858 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3859 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3860 const unsigned char* in, int *inlen) {
3861 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3862 }
3863
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3864 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3865 const unsigned char* in, int *inlen) {
3866 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3867 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3868 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3869 const unsigned char* in, int *inlen) {
3870 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3871 }
3872
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3873 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3874 const unsigned char* in, int *inlen) {
3875 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3876 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3877 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3878 const unsigned char* in, int *inlen) {
3879 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3880 }
3881
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3882 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3883 const unsigned char* in, int *inlen) {
3884 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3885 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3886 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3887 const unsigned char* in, int *inlen) {
3888 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3889 }
3890
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3891 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3892 const unsigned char* in, int *inlen) {
3893 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3894 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3895 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3896 const unsigned char* in, int *inlen) {
3897 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3898 }
3899
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3900 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3901 const unsigned char* in, int *inlen) {
3902 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3903 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3904 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3905 const unsigned char* in, int *inlen) {
3906 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3907 }
3908
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3909 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3910 const unsigned char* in, int *inlen) {
3911 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3912 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3913 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3914 const unsigned char* in, int *inlen) {
3915 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3916 }
3917
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3918 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3919 const unsigned char* in, int *inlen) {
3920 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3921 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3922 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3923 const unsigned char* in, int *inlen) {
3924 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3925 }
3926
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3927 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3928 const unsigned char* in, int *inlen) {
3929 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3930 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3931 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3932 const unsigned char* in, int *inlen) {
3933 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3934 }
3935
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3936 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3937 const unsigned char* in, int *inlen) {
3938 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3939 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3940 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3941 const unsigned char* in, int *inlen) {
3942 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3943 }
3944
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3945 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3946 const unsigned char* in, int *inlen) {
3947 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3948 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3949 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3950 const unsigned char* in, int *inlen) {
3951 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3952 }
3953
3954 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3955 xmlRegisterCharEncodingHandlersISO8859x (void) {
3956 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3957 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3958 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3959 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3960 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3961 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3962 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3963 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3964 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3965 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3966 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3967 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3968 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3969 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3970 }
3971
3972 #endif
3973 #endif
3974
3975 #define bottom_encoding
3976 #include "elfgcchack.h"
3977