1 /*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * daniel@veillard.com
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21 */
22
23 #define IN_LIBXML
24 #include "libxml.h"
25
26 #include <string.h>
27 #include <limits.h>
28
29 #ifdef HAVE_CTYPE_H
30 #include <ctype.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef LIBXML_ICONV_ENABLED
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #endif
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
44 #endif
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
47
48 #include "buf.h"
49 #include "enc.h"
50
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
57 const char *name;
58 const char *alias;
59 };
60
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
64
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66 #if 0
67 #define DEBUG_ENCODING /* Define this to get encoding traces */
68 #endif
69 #else
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
72 #endif
73 #endif
74
75 static int xmlLittleEndian = 1;
76
77 /**
78 * xmlEncodingErrMemory:
79 * @extra: extra informations
80 *
81 * Handle an out of memory condition
82 */
83 static void
xmlEncodingErrMemory(const char * extra)84 xmlEncodingErrMemory(const char *extra)
85 {
86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87 }
88
89 /**
90 * xmlErrEncoding:
91 * @error: the error number
92 * @msg: the error message
93 *
94 * n encoding error
95 */
96 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98 {
99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100 XML_FROM_I18N, error, XML_ERR_FATAL,
101 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102 }
103
104 #ifdef LIBXML_ICU_ENABLED
105 static uconv_t*
openIcuConverter(const char * name,int toUnicode)106 openIcuConverter(const char* name, int toUnicode)
107 {
108 UErrorCode status = U_ZERO_ERROR;
109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110 if (conv == NULL)
111 return NULL;
112
113 conv->pivot_source = conv->pivot_buf;
114 conv->pivot_target = conv->pivot_buf;
115
116 conv->uconv = ucnv_open(name, &status);
117 if (U_FAILURE(status))
118 goto error;
119
120 status = U_ZERO_ERROR;
121 if (toUnicode) {
122 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123 NULL, NULL, NULL, &status);
124 }
125 else {
126 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127 NULL, NULL, NULL, &status);
128 }
129 if (U_FAILURE(status))
130 goto error;
131
132 status = U_ZERO_ERROR;
133 conv->utf8 = ucnv_open("UTF-8", &status);
134 if (U_SUCCESS(status))
135 return conv;
136
137 error:
138 if (conv->uconv)
139 ucnv_close(conv->uconv);
140 xmlFree(conv);
141 return NULL;
142 }
143
144 static void
closeIcuConverter(uconv_t * conv)145 closeIcuConverter(uconv_t *conv)
146 {
147 if (conv != NULL) {
148 ucnv_close(conv->uconv);
149 ucnv_close(conv->utf8);
150 xmlFree(conv);
151 }
152 }
153 #endif /* LIBXML_ICU_ENABLED */
154
155 /************************************************************************
156 * *
157 * Conversions To/From UTF8 encoding *
158 * *
159 ************************************************************************/
160
161 /**
162 * asciiToUTF8:
163 * @out: a pointer to an array of bytes to store the result
164 * @outlen: the length of @out
165 * @in: a pointer to an array of ASCII chars
166 * @inlen: the length of @in
167 *
168 * Take a block of ASCII chars in and try to convert it to an UTF-8
169 * block of chars out.
170 * Returns 0 if success, or -1 otherwise
171 * The value of @inlen after return is the number of octets consumed
172 * if the return value is positive, else unpredictable.
173 * The value of @outlen after return is the number of octets consumed.
174 */
175 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)176 asciiToUTF8(unsigned char* out, int *outlen,
177 const unsigned char* in, int *inlen) {
178 unsigned char* outstart = out;
179 const unsigned char* base = in;
180 const unsigned char* processed = in;
181 unsigned char* outend = out + *outlen;
182 const unsigned char* inend;
183 unsigned int c;
184
185 inend = in + (*inlen);
186 while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 c= *in++;
188
189 if (out >= outend)
190 break;
191 if (c < 0x80) {
192 *out++ = c;
193 } else {
194 *outlen = out - outstart;
195 *inlen = processed - base;
196 return(-1);
197 }
198
199 processed = (const unsigned char*) in;
200 }
201 *outlen = out - outstart;
202 *inlen = processed - base;
203 return(*outlen);
204 }
205
206 #ifdef LIBXML_OUTPUT_ENABLED
207 /**
208 * UTF8Toascii:
209 * @out: a pointer to an array of bytes to store the result
210 * @outlen: the length of @out
211 * @in: a pointer to an array of UTF-8 chars
212 * @inlen: the length of @in
213 *
214 * Take a block of UTF-8 chars in and try to convert it to an ASCII
215 * block of chars out.
216 *
217 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218 * The value of @inlen after return is the number of octets consumed
219 * if the return value is positive, else unpredictable.
220 * The value of @outlen after return is the number of octets consumed.
221 */
222 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)223 UTF8Toascii(unsigned char* out, int *outlen,
224 const unsigned char* in, int *inlen) {
225 const unsigned char* processed = in;
226 const unsigned char* outend;
227 const unsigned char* outstart = out;
228 const unsigned char* instart = in;
229 const unsigned char* inend;
230 unsigned int c, d;
231 int trailing;
232
233 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234 if (in == NULL) {
235 /*
236 * initialization nothing to do
237 */
238 *outlen = 0;
239 *inlen = 0;
240 return(0);
241 }
242 inend = in + (*inlen);
243 outend = out + (*outlen);
244 while (in < inend) {
245 d = *in++;
246 if (d < 0x80) { c= d; trailing= 0; }
247 else if (d < 0xC0) {
248 /* trailing byte in leading position */
249 *outlen = out - outstart;
250 *inlen = processed - instart;
251 return(-2);
252 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
253 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
254 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
255 else {
256 /* no chance for this in Ascii */
257 *outlen = out - outstart;
258 *inlen = processed - instart;
259 return(-2);
260 }
261
262 if (inend - in < trailing) {
263 break;
264 }
265
266 for ( ; trailing; trailing--) {
267 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268 break;
269 c <<= 6;
270 c |= d & 0x3F;
271 }
272
273 /* assertion: c is a single UTF-4 value */
274 if (c < 0x80) {
275 if (out >= outend)
276 break;
277 *out++ = c;
278 } else {
279 /* no chance for this in Ascii */
280 *outlen = out - outstart;
281 *inlen = processed - instart;
282 return(-2);
283 }
284 processed = in;
285 }
286 *outlen = out - outstart;
287 *inlen = processed - instart;
288 return(*outlen);
289 }
290 #endif /* LIBXML_OUTPUT_ENABLED */
291
292 /**
293 * isolat1ToUTF8:
294 * @out: a pointer to an array of bytes to store the result
295 * @outlen: the length of @out
296 * @in: a pointer to an array of ISO Latin 1 chars
297 * @inlen: the length of @in
298 *
299 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300 * block of chars out.
301 * Returns the number of bytes written if success, or -1 otherwise
302 * The value of @inlen after return is the number of octets consumed
303 * if the return value is positive, else unpredictable.
304 * The value of @outlen after return is the number of octets consumed.
305 */
306 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)307 isolat1ToUTF8(unsigned char* out, int *outlen,
308 const unsigned char* in, int *inlen) {
309 unsigned char* outstart = out;
310 const unsigned char* base = in;
311 unsigned char* outend;
312 const unsigned char* inend;
313 const unsigned char* instop;
314
315 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316 return(-1);
317
318 outend = out + *outlen;
319 inend = in + (*inlen);
320 instop = inend;
321
322 while ((in < inend) && (out < outend - 1)) {
323 if (*in >= 0x80) {
324 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
325 *out++ = ((*in) & 0x3F) | 0x80;
326 ++in;
327 }
328 if ((instop - in) > (outend - out)) instop = in + (outend - out);
329 while ((in < instop) && (*in < 0x80)) {
330 *out++ = *in++;
331 }
332 }
333 if ((in < inend) && (out < outend) && (*in < 0x80)) {
334 *out++ = *in++;
335 }
336 *outlen = out - outstart;
337 *inlen = in - base;
338 return(*outlen);
339 }
340
341 /**
342 * UTF8ToUTF8:
343 * @out: a pointer to an array of bytes to store the result
344 * @outlen: the length of @out
345 * @inb: a pointer to an array of UTF-8 chars
346 * @inlenb: the length of @in in UTF-8 chars
347 *
348 * No op copy operation for UTF8 handling.
349 *
350 * Returns the number of bytes written, or -1 if lack of space.
351 * The value of *inlen after return is the number of octets consumed
352 * if the return value is positive, else unpredictable.
353 */
354 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)355 UTF8ToUTF8(unsigned char* out, int *outlen,
356 const unsigned char* inb, int *inlenb)
357 {
358 int len;
359
360 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361 return(-1);
362 if (inb == NULL) {
363 /* inb == NULL means output is initialized. */
364 *outlen = 0;
365 *inlenb = 0;
366 return(0);
367 }
368 if (*outlen > *inlenb) {
369 len = *inlenb;
370 } else {
371 len = *outlen;
372 }
373 if (len < 0)
374 return(-1);
375
376 memcpy(out, inb, len);
377
378 *outlen = len;
379 *inlenb = len;
380 return(*outlen);
381 }
382
383
384 #ifdef LIBXML_OUTPUT_ENABLED
385 /**
386 * UTF8Toisolat1:
387 * @out: a pointer to an array of bytes to store the result
388 * @outlen: the length of @out
389 * @in: a pointer to an array of UTF-8 chars
390 * @inlen: the length of @in
391 *
392 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
393 * block of chars out.
394 *
395 * Returns the number of bytes written if success, -2 if the transcoding fails,
396 or -1 otherwise
397 * The value of @inlen after return is the number of octets consumed
398 * if the return value is positive, else unpredictable.
399 * The value of @outlen after return is the number of octets consumed.
400 */
401 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)402 UTF8Toisolat1(unsigned char* out, int *outlen,
403 const unsigned char* in, int *inlen) {
404 const unsigned char* processed = in;
405 const unsigned char* outend;
406 const unsigned char* outstart = out;
407 const unsigned char* instart = in;
408 const unsigned char* inend;
409 unsigned int c, d;
410 int trailing;
411
412 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
413 if (in == NULL) {
414 /*
415 * initialization nothing to do
416 */
417 *outlen = 0;
418 *inlen = 0;
419 return(0);
420 }
421 inend = in + (*inlen);
422 outend = out + (*outlen);
423 while (in < inend) {
424 d = *in++;
425 if (d < 0x80) { c= d; trailing= 0; }
426 else if (d < 0xC0) {
427 /* trailing byte in leading position */
428 *outlen = out - outstart;
429 *inlen = processed - instart;
430 return(-2);
431 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
432 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
433 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
434 else {
435 /* no chance for this in IsoLat1 */
436 *outlen = out - outstart;
437 *inlen = processed - instart;
438 return(-2);
439 }
440
441 if (inend - in < trailing) {
442 break;
443 }
444
445 for ( ; trailing; trailing--) {
446 if (in >= inend)
447 break;
448 if (((d= *in++) & 0xC0) != 0x80) {
449 *outlen = out - outstart;
450 *inlen = processed - instart;
451 return(-2);
452 }
453 c <<= 6;
454 c |= d & 0x3F;
455 }
456
457 /* assertion: c is a single UTF-4 value */
458 if (c <= 0xFF) {
459 if (out >= outend)
460 break;
461 *out++ = c;
462 } else {
463 /* no chance for this in IsoLat1 */
464 *outlen = out - outstart;
465 *inlen = processed - instart;
466 return(-2);
467 }
468 processed = in;
469 }
470 *outlen = out - outstart;
471 *inlen = processed - instart;
472 return(*outlen);
473 }
474 #endif /* LIBXML_OUTPUT_ENABLED */
475
476 /**
477 * UTF16LEToUTF8:
478 * @out: a pointer to an array of bytes to store the result
479 * @outlen: the length of @out
480 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
481 * @inlenb: the length of @in in UTF-16LE chars
482 *
483 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
484 * block of chars out. This function assumes the endian property
485 * is the same between the native type of this machine and the
486 * inputed one.
487 *
488 * Returns the number of bytes written, or -1 if lack of space, or -2
489 * if the transcoding fails (if *in is not a valid utf16 string)
490 * The value of *inlen after return is the number of octets consumed
491 * if the return value is positive, else unpredictable.
492 */
493 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)494 UTF16LEToUTF8(unsigned char* out, int *outlen,
495 const unsigned char* inb, int *inlenb)
496 {
497 unsigned char* outstart = out;
498 const unsigned char* processed = inb;
499 unsigned char* outend = out + *outlen;
500 unsigned short* in = (unsigned short*) inb;
501 unsigned short* inend;
502 unsigned int c, d, inlen;
503 unsigned char *tmp;
504 int bits;
505
506 if ((*inlenb % 2) == 1)
507 (*inlenb)--;
508 inlen = *inlenb / 2;
509 inend = in + inlen;
510 while ((in < inend) && (out - outstart + 5 < *outlen)) {
511 if (xmlLittleEndian) {
512 c= *in++;
513 } else {
514 tmp = (unsigned char *) in;
515 c = *tmp++;
516 c = c | (((unsigned int)*tmp) << 8);
517 in++;
518 }
519 if ((c & 0xFC00) == 0xD800) { /* surrogates */
520 if (in >= inend) { /* (in > inend) shouldn't happens */
521 break;
522 }
523 if (xmlLittleEndian) {
524 d = *in++;
525 } else {
526 tmp = (unsigned char *) in;
527 d = *tmp++;
528 d = d | (((unsigned int)*tmp) << 8);
529 in++;
530 }
531 if ((d & 0xFC00) == 0xDC00) {
532 c &= 0x03FF;
533 c <<= 10;
534 c |= d & 0x03FF;
535 c += 0x10000;
536 }
537 else {
538 *outlen = out - outstart;
539 *inlenb = processed - inb;
540 return(-2);
541 }
542 }
543
544 /* assertion: c is a single UTF-4 value */
545 if (out >= outend)
546 break;
547 if (c < 0x80) { *out++= c; bits= -6; }
548 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
549 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
550 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
551
552 for ( ; bits >= 0; bits-= 6) {
553 if (out >= outend)
554 break;
555 *out++= ((c >> bits) & 0x3F) | 0x80;
556 }
557 processed = (const unsigned char*) in;
558 }
559 *outlen = out - outstart;
560 *inlenb = processed - inb;
561 return(*outlen);
562 }
563
564 #ifdef LIBXML_OUTPUT_ENABLED
565 /**
566 * UTF8ToUTF16LE:
567 * @outb: a pointer to an array of bytes to store the result
568 * @outlen: the length of @outb
569 * @in: a pointer to an array of UTF-8 chars
570 * @inlen: the length of @in
571 *
572 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
573 * block of chars out.
574 *
575 * Returns the number of bytes written, or -1 if lack of space, or -2
576 * if the transcoding failed.
577 */
578 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)579 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
580 const unsigned char* in, int *inlen)
581 {
582 unsigned short* out = (unsigned short*) outb;
583 const unsigned char* processed = in;
584 const unsigned char *const instart = in;
585 unsigned short* outstart= out;
586 unsigned short* outend;
587 const unsigned char* inend;
588 unsigned int c, d;
589 int trailing;
590 unsigned char *tmp;
591 unsigned short tmp1, tmp2;
592
593 /* UTF16LE encoding has no BOM */
594 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
595 if (in == NULL) {
596 *outlen = 0;
597 *inlen = 0;
598 return(0);
599 }
600 inend= in + *inlen;
601 outend = out + (*outlen / 2);
602 while (in < inend) {
603 d= *in++;
604 if (d < 0x80) { c= d; trailing= 0; }
605 else if (d < 0xC0) {
606 /* trailing byte in leading position */
607 *outlen = (out - outstart) * 2;
608 *inlen = processed - instart;
609 return(-2);
610 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
611 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
612 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
613 else {
614 /* no chance for this in UTF-16 */
615 *outlen = (out - outstart) * 2;
616 *inlen = processed - instart;
617 return(-2);
618 }
619
620 if (inend - in < trailing) {
621 break;
622 }
623
624 for ( ; trailing; trailing--) {
625 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
626 break;
627 c <<= 6;
628 c |= d & 0x3F;
629 }
630
631 /* assertion: c is a single UTF-4 value */
632 if (c < 0x10000) {
633 if (out >= outend)
634 break;
635 if (xmlLittleEndian) {
636 *out++ = c;
637 } else {
638 tmp = (unsigned char *) out;
639 *tmp = c ;
640 *(tmp + 1) = c >> 8 ;
641 out++;
642 }
643 }
644 else if (c < 0x110000) {
645 if (out+1 >= outend)
646 break;
647 c -= 0x10000;
648 if (xmlLittleEndian) {
649 *out++ = 0xD800 | (c >> 10);
650 *out++ = 0xDC00 | (c & 0x03FF);
651 } else {
652 tmp1 = 0xD800 | (c >> 10);
653 tmp = (unsigned char *) out;
654 *tmp = (unsigned char) tmp1;
655 *(tmp + 1) = tmp1 >> 8;
656 out++;
657
658 tmp2 = 0xDC00 | (c & 0x03FF);
659 tmp = (unsigned char *) out;
660 *tmp = (unsigned char) tmp2;
661 *(tmp + 1) = tmp2 >> 8;
662 out++;
663 }
664 }
665 else
666 break;
667 processed = in;
668 }
669 *outlen = (out - outstart) * 2;
670 *inlen = processed - instart;
671 return(*outlen);
672 }
673
674 /**
675 * UTF8ToUTF16:
676 * @outb: a pointer to an array of bytes to store the result
677 * @outlen: the length of @outb
678 * @in: a pointer to an array of UTF-8 chars
679 * @inlen: the length of @in
680 *
681 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
682 * block of chars out.
683 *
684 * Returns the number of bytes written, or -1 if lack of space, or -2
685 * if the transcoding failed.
686 */
687 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)688 UTF8ToUTF16(unsigned char* outb, int *outlen,
689 const unsigned char* in, int *inlen)
690 {
691 if (in == NULL) {
692 /*
693 * initialization, add the Byte Order Mark for UTF-16LE
694 */
695 if (*outlen >= 2) {
696 outb[0] = 0xFF;
697 outb[1] = 0xFE;
698 *outlen = 2;
699 *inlen = 0;
700 #ifdef DEBUG_ENCODING
701 xmlGenericError(xmlGenericErrorContext,
702 "Added FFFE Byte Order Mark\n");
703 #endif
704 return(2);
705 }
706 *outlen = 0;
707 *inlen = 0;
708 return(0);
709 }
710 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
711 }
712 #endif /* LIBXML_OUTPUT_ENABLED */
713
714 /**
715 * UTF16BEToUTF8:
716 * @out: a pointer to an array of bytes to store the result
717 * @outlen: the length of @out
718 * @inb: a pointer to an array of UTF-16 passed as a byte array
719 * @inlenb: the length of @in in UTF-16 chars
720 *
721 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
722 * block of chars out. This function assumes the endian property
723 * is the same between the native type of this machine and the
724 * inputed one.
725 *
726 * Returns the number of bytes written, or -1 if lack of space, or -2
727 * if the transcoding fails (if *in is not a valid utf16 string)
728 * The value of *inlen after return is the number of octets consumed
729 * if the return value is positive, else unpredictable.
730 */
731 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)732 UTF16BEToUTF8(unsigned char* out, int *outlen,
733 const unsigned char* inb, int *inlenb)
734 {
735 unsigned char* outstart = out;
736 const unsigned char* processed = inb;
737 unsigned char* outend = out + *outlen;
738 unsigned short* in = (unsigned short*) inb;
739 unsigned short* inend;
740 unsigned int c, d, inlen;
741 unsigned char *tmp;
742 int bits;
743
744 if ((*inlenb % 2) == 1)
745 (*inlenb)--;
746 inlen = *inlenb / 2;
747 inend= in + inlen;
748 while (in < inend) {
749 if (xmlLittleEndian) {
750 tmp = (unsigned char *) in;
751 c = *tmp++;
752 c = c << 8;
753 c = c | (unsigned int) *tmp;
754 in++;
755 } else {
756 c= *in++;
757 }
758 if ((c & 0xFC00) == 0xD800) { /* surrogates */
759 if (in >= inend) { /* (in > inend) shouldn't happens */
760 *outlen = out - outstart;
761 *inlenb = processed - inb;
762 return(-2);
763 }
764 if (xmlLittleEndian) {
765 tmp = (unsigned char *) in;
766 d = *tmp++;
767 d = d << 8;
768 d = d | (unsigned int) *tmp;
769 in++;
770 } else {
771 d= *in++;
772 }
773 if ((d & 0xFC00) == 0xDC00) {
774 c &= 0x03FF;
775 c <<= 10;
776 c |= d & 0x03FF;
777 c += 0x10000;
778 }
779 else {
780 *outlen = out - outstart;
781 *inlenb = processed - inb;
782 return(-2);
783 }
784 }
785
786 /* assertion: c is a single UTF-4 value */
787 if (out >= outend)
788 break;
789 if (c < 0x80) { *out++= c; bits= -6; }
790 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
791 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
792 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
793
794 for ( ; bits >= 0; bits-= 6) {
795 if (out >= outend)
796 break;
797 *out++= ((c >> bits) & 0x3F) | 0x80;
798 }
799 processed = (const unsigned char*) in;
800 }
801 *outlen = out - outstart;
802 *inlenb = processed - inb;
803 return(*outlen);
804 }
805
806 #ifdef LIBXML_OUTPUT_ENABLED
807 /**
808 * UTF8ToUTF16BE:
809 * @outb: a pointer to an array of bytes to store the result
810 * @outlen: the length of @outb
811 * @in: a pointer to an array of UTF-8 chars
812 * @inlen: the length of @in
813 *
814 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
815 * block of chars out.
816 *
817 * Returns the number of byte written, or -1 by lack of space, or -2
818 * if the transcoding failed.
819 */
820 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)821 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
822 const unsigned char* in, int *inlen)
823 {
824 unsigned short* out = (unsigned short*) outb;
825 const unsigned char* processed = in;
826 const unsigned char *const instart = in;
827 unsigned short* outstart= out;
828 unsigned short* outend;
829 const unsigned char* inend;
830 unsigned int c, d;
831 int trailing;
832 unsigned char *tmp;
833 unsigned short tmp1, tmp2;
834
835 /* UTF-16BE has no BOM */
836 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
837 if (in == NULL) {
838 *outlen = 0;
839 *inlen = 0;
840 return(0);
841 }
842 inend= in + *inlen;
843 outend = out + (*outlen / 2);
844 while (in < inend) {
845 d= *in++;
846 if (d < 0x80) { c= d; trailing= 0; }
847 else if (d < 0xC0) {
848 /* trailing byte in leading position */
849 *outlen = out - outstart;
850 *inlen = processed - instart;
851 return(-2);
852 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
853 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
854 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
855 else {
856 /* no chance for this in UTF-16 */
857 *outlen = out - outstart;
858 *inlen = processed - instart;
859 return(-2);
860 }
861
862 if (inend - in < trailing) {
863 break;
864 }
865
866 for ( ; trailing; trailing--) {
867 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
868 c <<= 6;
869 c |= d & 0x3F;
870 }
871
872 /* assertion: c is a single UTF-4 value */
873 if (c < 0x10000) {
874 if (out >= outend) break;
875 if (xmlLittleEndian) {
876 tmp = (unsigned char *) out;
877 *tmp = c >> 8;
878 *(tmp + 1) = c;
879 out++;
880 } else {
881 *out++ = c;
882 }
883 }
884 else if (c < 0x110000) {
885 if (out+1 >= outend) break;
886 c -= 0x10000;
887 if (xmlLittleEndian) {
888 tmp1 = 0xD800 | (c >> 10);
889 tmp = (unsigned char *) out;
890 *tmp = tmp1 >> 8;
891 *(tmp + 1) = (unsigned char) tmp1;
892 out++;
893
894 tmp2 = 0xDC00 | (c & 0x03FF);
895 tmp = (unsigned char *) out;
896 *tmp = tmp2 >> 8;
897 *(tmp + 1) = (unsigned char) tmp2;
898 out++;
899 } else {
900 *out++ = 0xD800 | (c >> 10);
901 *out++ = 0xDC00 | (c & 0x03FF);
902 }
903 }
904 else
905 break;
906 processed = in;
907 }
908 *outlen = (out - outstart) * 2;
909 *inlen = processed - instart;
910 return(*outlen);
911 }
912 #endif /* LIBXML_OUTPUT_ENABLED */
913
914 /************************************************************************
915 * *
916 * Generic encoding handling routines *
917 * *
918 ************************************************************************/
919
920 /**
921 * xmlDetectCharEncoding:
922 * @in: a pointer to the first bytes of the XML entity, must be at least
923 * 2 bytes long (at least 4 if encoding is UTF4 variant).
924 * @len: pointer to the length of the buffer
925 *
926 * Guess the encoding of the entity using the first bytes of the entity content
927 * according to the non-normative appendix F of the XML-1.0 recommendation.
928 *
929 * Returns one of the XML_CHAR_ENCODING_... values.
930 */
931 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)932 xmlDetectCharEncoding(const unsigned char* in, int len)
933 {
934 if (in == NULL)
935 return(XML_CHAR_ENCODING_NONE);
936 if (len >= 4) {
937 if ((in[0] == 0x00) && (in[1] == 0x00) &&
938 (in[2] == 0x00) && (in[3] == 0x3C))
939 return(XML_CHAR_ENCODING_UCS4BE);
940 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
941 (in[2] == 0x00) && (in[3] == 0x00))
942 return(XML_CHAR_ENCODING_UCS4LE);
943 if ((in[0] == 0x00) && (in[1] == 0x00) &&
944 (in[2] == 0x3C) && (in[3] == 0x00))
945 return(XML_CHAR_ENCODING_UCS4_2143);
946 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
947 (in[2] == 0x00) && (in[3] == 0x00))
948 return(XML_CHAR_ENCODING_UCS4_3412);
949 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
950 (in[2] == 0xA7) && (in[3] == 0x94))
951 return(XML_CHAR_ENCODING_EBCDIC);
952 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
953 (in[2] == 0x78) && (in[3] == 0x6D))
954 return(XML_CHAR_ENCODING_UTF8);
955 /*
956 * Although not part of the recommendation, we also
957 * attempt an "auto-recognition" of UTF-16LE and
958 * UTF-16BE encodings.
959 */
960 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961 (in[2] == 0x3F) && (in[3] == 0x00))
962 return(XML_CHAR_ENCODING_UTF16LE);
963 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
964 (in[2] == 0x00) && (in[3] == 0x3F))
965 return(XML_CHAR_ENCODING_UTF16BE);
966 }
967 if (len >= 3) {
968 /*
969 * Errata on XML-1.0 June 20 2001
970 * We now allow an UTF8 encoded BOM
971 */
972 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
973 (in[2] == 0xBF))
974 return(XML_CHAR_ENCODING_UTF8);
975 }
976 /* For UTF-16 we can recognize by the BOM */
977 if (len >= 2) {
978 if ((in[0] == 0xFE) && (in[1] == 0xFF))
979 return(XML_CHAR_ENCODING_UTF16BE);
980 if ((in[0] == 0xFF) && (in[1] == 0xFE))
981 return(XML_CHAR_ENCODING_UTF16LE);
982 }
983 return(XML_CHAR_ENCODING_NONE);
984 }
985
986 /**
987 * xmlCleanupEncodingAliases:
988 *
989 * Unregisters all aliases
990 */
991 void
xmlCleanupEncodingAliases(void)992 xmlCleanupEncodingAliases(void) {
993 int i;
994
995 if (xmlCharEncodingAliases == NULL)
996 return;
997
998 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
999 if (xmlCharEncodingAliases[i].name != NULL)
1000 xmlFree((char *) xmlCharEncodingAliases[i].name);
1001 if (xmlCharEncodingAliases[i].alias != NULL)
1002 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1003 }
1004 xmlCharEncodingAliasesNb = 0;
1005 xmlCharEncodingAliasesMax = 0;
1006 xmlFree(xmlCharEncodingAliases);
1007 xmlCharEncodingAliases = NULL;
1008 }
1009
1010 /**
1011 * xmlGetEncodingAlias:
1012 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1013 *
1014 * Lookup an encoding name for the given alias.
1015 *
1016 * Returns NULL if not found, otherwise the original name
1017 */
1018 const char *
xmlGetEncodingAlias(const char * alias)1019 xmlGetEncodingAlias(const char *alias) {
1020 int i;
1021 char upper[100];
1022
1023 if (alias == NULL)
1024 return(NULL);
1025
1026 if (xmlCharEncodingAliases == NULL)
1027 return(NULL);
1028
1029 for (i = 0;i < 99;i++) {
1030 upper[i] = toupper(alias[i]);
1031 if (upper[i] == 0) break;
1032 }
1033 upper[i] = 0;
1034
1035 /*
1036 * Walk down the list looking for a definition of the alias
1037 */
1038 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1039 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1040 return(xmlCharEncodingAliases[i].name);
1041 }
1042 }
1043 return(NULL);
1044 }
1045
1046 /**
1047 * xmlAddEncodingAlias:
1048 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1049 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1050 *
1051 * Registers an alias @alias for an encoding named @name. Existing alias
1052 * will be overwritten.
1053 *
1054 * Returns 0 in case of success, -1 in case of error
1055 */
1056 int
xmlAddEncodingAlias(const char * name,const char * alias)1057 xmlAddEncodingAlias(const char *name, const char *alias) {
1058 int i;
1059 char upper[100];
1060
1061 if ((name == NULL) || (alias == NULL))
1062 return(-1);
1063
1064 for (i = 0;i < 99;i++) {
1065 upper[i] = toupper(alias[i]);
1066 if (upper[i] == 0) break;
1067 }
1068 upper[i] = 0;
1069
1070 if (xmlCharEncodingAliases == NULL) {
1071 xmlCharEncodingAliasesNb = 0;
1072 xmlCharEncodingAliasesMax = 20;
1073 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1074 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1075 if (xmlCharEncodingAliases == NULL)
1076 return(-1);
1077 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1078 xmlCharEncodingAliasesMax *= 2;
1079 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1080 xmlRealloc(xmlCharEncodingAliases,
1081 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1082 }
1083 /*
1084 * Walk down the list looking for a definition of the alias
1085 */
1086 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1087 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1088 /*
1089 * Replace the definition.
1090 */
1091 xmlFree((char *) xmlCharEncodingAliases[i].name);
1092 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1093 return(0);
1094 }
1095 }
1096 /*
1097 * Add the definition
1098 */
1099 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1100 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1101 xmlCharEncodingAliasesNb++;
1102 return(0);
1103 }
1104
1105 /**
1106 * xmlDelEncodingAlias:
1107 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1108 *
1109 * Unregisters an encoding alias @alias
1110 *
1111 * Returns 0 in case of success, -1 in case of error
1112 */
1113 int
xmlDelEncodingAlias(const char * alias)1114 xmlDelEncodingAlias(const char *alias) {
1115 int i;
1116
1117 if (alias == NULL)
1118 return(-1);
1119
1120 if (xmlCharEncodingAliases == NULL)
1121 return(-1);
1122 /*
1123 * Walk down the list looking for a definition of the alias
1124 */
1125 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1126 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1127 xmlFree((char *) xmlCharEncodingAliases[i].name);
1128 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1129 xmlCharEncodingAliasesNb--;
1130 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1131 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1132 return(0);
1133 }
1134 }
1135 return(-1);
1136 }
1137
1138 /**
1139 * xmlParseCharEncoding:
1140 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1141 *
1142 * Compare the string to the encoding schemes already known. Note
1143 * that the comparison is case insensitive accordingly to the section
1144 * [XML] 4.3.3 Character Encoding in Entities.
1145 *
1146 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1147 * if not recognized.
1148 */
1149 xmlCharEncoding
xmlParseCharEncoding(const char * name)1150 xmlParseCharEncoding(const char* name)
1151 {
1152 const char *alias;
1153 char upper[500];
1154 int i;
1155
1156 if (name == NULL)
1157 return(XML_CHAR_ENCODING_NONE);
1158
1159 /*
1160 * Do the alias resolution
1161 */
1162 alias = xmlGetEncodingAlias(name);
1163 if (alias != NULL)
1164 name = alias;
1165
1166 for (i = 0;i < 499;i++) {
1167 upper[i] = toupper(name[i]);
1168 if (upper[i] == 0) break;
1169 }
1170 upper[i] = 0;
1171
1172 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1173 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1174 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1175
1176 /*
1177 * NOTE: if we were able to parse this, the endianness of UTF16 is
1178 * already found and in use
1179 */
1180 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1181 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1182
1183 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1184 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1185 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1186
1187 /*
1188 * NOTE: if we were able to parse this, the endianness of UCS4 is
1189 * already found and in use
1190 */
1191 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1192 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1193 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1194
1195
1196 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1197 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1198 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1199
1200 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1201 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1202 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1203
1204 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1205 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1206 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1207 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1208 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1209 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1210 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1211
1212 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1213 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1214 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1215
1216 #ifdef DEBUG_ENCODING
1217 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1218 #endif
1219 return(XML_CHAR_ENCODING_ERROR);
1220 }
1221
1222 /**
1223 * xmlGetCharEncodingName:
1224 * @enc: the encoding
1225 *
1226 * The "canonical" name for XML encoding.
1227 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1228 * Section 4.3.3 Character Encoding in Entities
1229 *
1230 * Returns the canonical name for the given encoding
1231 */
1232
1233 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1234 xmlGetCharEncodingName(xmlCharEncoding enc) {
1235 switch (enc) {
1236 case XML_CHAR_ENCODING_ERROR:
1237 return(NULL);
1238 case XML_CHAR_ENCODING_NONE:
1239 return(NULL);
1240 case XML_CHAR_ENCODING_UTF8:
1241 return("UTF-8");
1242 case XML_CHAR_ENCODING_UTF16LE:
1243 return("UTF-16");
1244 case XML_CHAR_ENCODING_UTF16BE:
1245 return("UTF-16");
1246 case XML_CHAR_ENCODING_EBCDIC:
1247 return("EBCDIC");
1248 case XML_CHAR_ENCODING_UCS4LE:
1249 return("ISO-10646-UCS-4");
1250 case XML_CHAR_ENCODING_UCS4BE:
1251 return("ISO-10646-UCS-4");
1252 case XML_CHAR_ENCODING_UCS4_2143:
1253 return("ISO-10646-UCS-4");
1254 case XML_CHAR_ENCODING_UCS4_3412:
1255 return("ISO-10646-UCS-4");
1256 case XML_CHAR_ENCODING_UCS2:
1257 return("ISO-10646-UCS-2");
1258 case XML_CHAR_ENCODING_8859_1:
1259 return("ISO-8859-1");
1260 case XML_CHAR_ENCODING_8859_2:
1261 return("ISO-8859-2");
1262 case XML_CHAR_ENCODING_8859_3:
1263 return("ISO-8859-3");
1264 case XML_CHAR_ENCODING_8859_4:
1265 return("ISO-8859-4");
1266 case XML_CHAR_ENCODING_8859_5:
1267 return("ISO-8859-5");
1268 case XML_CHAR_ENCODING_8859_6:
1269 return("ISO-8859-6");
1270 case XML_CHAR_ENCODING_8859_7:
1271 return("ISO-8859-7");
1272 case XML_CHAR_ENCODING_8859_8:
1273 return("ISO-8859-8");
1274 case XML_CHAR_ENCODING_8859_9:
1275 return("ISO-8859-9");
1276 case XML_CHAR_ENCODING_2022_JP:
1277 return("ISO-2022-JP");
1278 case XML_CHAR_ENCODING_SHIFT_JIS:
1279 return("Shift-JIS");
1280 case XML_CHAR_ENCODING_EUC_JP:
1281 return("EUC-JP");
1282 case XML_CHAR_ENCODING_ASCII:
1283 return(NULL);
1284 }
1285 return(NULL);
1286 }
1287
1288 /************************************************************************
1289 * *
1290 * Char encoding handlers *
1291 * *
1292 ************************************************************************/
1293
1294
1295 /* the size should be growable, but it's not a big deal ... */
1296 #define MAX_ENCODING_HANDLERS 50
1297 static xmlCharEncodingHandlerPtr *handlers = NULL;
1298 static int nbCharEncodingHandler = 0;
1299
1300 /*
1301 * The default is UTF-8 for XML, that's also the default used for the
1302 * parser internals, so the default encoding handler is NULL
1303 */
1304
1305 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1306
1307 /**
1308 * xmlNewCharEncodingHandler:
1309 * @name: the encoding name, in UTF-8 format (ASCII actually)
1310 * @input: the xmlCharEncodingInputFunc to read that encoding
1311 * @output: the xmlCharEncodingOutputFunc to write that encoding
1312 *
1313 * Create and registers an xmlCharEncodingHandler.
1314 *
1315 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1316 */
1317 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1318 xmlNewCharEncodingHandler(const char *name,
1319 xmlCharEncodingInputFunc input,
1320 xmlCharEncodingOutputFunc output) {
1321 xmlCharEncodingHandlerPtr handler;
1322 const char *alias;
1323 char upper[500];
1324 int i;
1325 char *up = NULL;
1326
1327 /*
1328 * Do the alias resolution
1329 */
1330 alias = xmlGetEncodingAlias(name);
1331 if (alias != NULL)
1332 name = alias;
1333
1334 /*
1335 * Keep only the uppercase version of the encoding.
1336 */
1337 if (name == NULL) {
1338 xmlEncodingErr(XML_I18N_NO_NAME,
1339 "xmlNewCharEncodingHandler : no name !\n", NULL);
1340 return(NULL);
1341 }
1342 for (i = 0;i < 499;i++) {
1343 upper[i] = toupper(name[i]);
1344 if (upper[i] == 0) break;
1345 }
1346 upper[i] = 0;
1347 up = xmlMemStrdup(upper);
1348 if (up == NULL) {
1349 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1350 return(NULL);
1351 }
1352
1353 /*
1354 * allocate and fill-up an handler block.
1355 */
1356 handler = (xmlCharEncodingHandlerPtr)
1357 xmlMalloc(sizeof(xmlCharEncodingHandler));
1358 if (handler == NULL) {
1359 xmlFree(up);
1360 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361 return(NULL);
1362 }
1363 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1364 handler->input = input;
1365 handler->output = output;
1366 handler->name = up;
1367
1368 #ifdef LIBXML_ICONV_ENABLED
1369 handler->iconv_in = NULL;
1370 handler->iconv_out = NULL;
1371 #endif
1372 #ifdef LIBXML_ICU_ENABLED
1373 handler->uconv_in = NULL;
1374 handler->uconv_out = NULL;
1375 #endif
1376
1377 /*
1378 * registers and returns the handler.
1379 */
1380 xmlRegisterCharEncodingHandler(handler);
1381 #ifdef DEBUG_ENCODING
1382 xmlGenericError(xmlGenericErrorContext,
1383 "Registered encoding handler for %s\n", name);
1384 #endif
1385 return(handler);
1386 }
1387
1388 /**
1389 * xmlInitCharEncodingHandlers:
1390 *
1391 * Initialize the char encoding support, it registers the default
1392 * encoding supported.
1393 * NOTE: while public, this function usually doesn't need to be called
1394 * in normal processing.
1395 */
1396 void
xmlInitCharEncodingHandlers(void)1397 xmlInitCharEncodingHandlers(void) {
1398 unsigned short int tst = 0x1234;
1399 unsigned char *ptr = (unsigned char *) &tst;
1400
1401 if (handlers != NULL) return;
1402
1403 handlers = (xmlCharEncodingHandlerPtr *)
1404 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1405
1406 if (*ptr == 0x12) xmlLittleEndian = 0;
1407 else if (*ptr == 0x34) xmlLittleEndian = 1;
1408 else {
1409 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1410 "Odd problem at endianness detection\n", NULL);
1411 }
1412
1413 if (handlers == NULL) {
1414 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1415 return;
1416 }
1417 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1418 #ifdef LIBXML_OUTPUT_ENABLED
1419 xmlUTF16LEHandler =
1420 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1421 xmlUTF16BEHandler =
1422 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1423 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1424 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1425 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1426 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1427 #ifdef LIBXML_HTML_ENABLED
1428 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1429 #endif
1430 #else
1431 xmlUTF16LEHandler =
1432 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1433 xmlUTF16BEHandler =
1434 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1435 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1436 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1437 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1438 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1439 #endif /* LIBXML_OUTPUT_ENABLED */
1440 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1441 #ifdef LIBXML_ISO8859X_ENABLED
1442 xmlRegisterCharEncodingHandlersISO8859x ();
1443 #endif
1444 #endif
1445
1446 }
1447
1448 /**
1449 * xmlCleanupCharEncodingHandlers:
1450 *
1451 * Cleanup the memory allocated for the char encoding support, it
1452 * unregisters all the encoding handlers and the aliases.
1453 */
1454 void
xmlCleanupCharEncodingHandlers(void)1455 xmlCleanupCharEncodingHandlers(void) {
1456 xmlCleanupEncodingAliases();
1457
1458 if (handlers == NULL) return;
1459
1460 for (;nbCharEncodingHandler > 0;) {
1461 nbCharEncodingHandler--;
1462 if (handlers[nbCharEncodingHandler] != NULL) {
1463 if (handlers[nbCharEncodingHandler]->name != NULL)
1464 xmlFree(handlers[nbCharEncodingHandler]->name);
1465 xmlFree(handlers[nbCharEncodingHandler]);
1466 }
1467 }
1468 xmlFree(handlers);
1469 handlers = NULL;
1470 nbCharEncodingHandler = 0;
1471 xmlDefaultCharEncodingHandler = NULL;
1472 }
1473
1474 /**
1475 * xmlRegisterCharEncodingHandler:
1476 * @handler: the xmlCharEncodingHandlerPtr handler block
1477 *
1478 * Register the char encoding handler, surprising, isn't it ?
1479 */
1480 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1481 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1482 if (handlers == NULL) xmlInitCharEncodingHandlers();
1483 if ((handler == NULL) || (handlers == NULL)) {
1484 xmlEncodingErr(XML_I18N_NO_HANDLER,
1485 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1486 return;
1487 }
1488
1489 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1490 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1491 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1492 "MAX_ENCODING_HANDLERS");
1493 return;
1494 }
1495 handlers[nbCharEncodingHandler++] = handler;
1496 }
1497
1498 /**
1499 * xmlGetCharEncodingHandler:
1500 * @enc: an xmlCharEncoding value.
1501 *
1502 * Search in the registered set the handler able to read/write that encoding.
1503 *
1504 * Returns the handler or NULL if not found
1505 */
1506 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1507 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1508 xmlCharEncodingHandlerPtr handler;
1509
1510 if (handlers == NULL) xmlInitCharEncodingHandlers();
1511 switch (enc) {
1512 case XML_CHAR_ENCODING_ERROR:
1513 return(NULL);
1514 case XML_CHAR_ENCODING_NONE:
1515 return(NULL);
1516 case XML_CHAR_ENCODING_UTF8:
1517 return(NULL);
1518 case XML_CHAR_ENCODING_UTF16LE:
1519 return(xmlUTF16LEHandler);
1520 case XML_CHAR_ENCODING_UTF16BE:
1521 return(xmlUTF16BEHandler);
1522 case XML_CHAR_ENCODING_EBCDIC:
1523 handler = xmlFindCharEncodingHandler("EBCDIC");
1524 if (handler != NULL) return(handler);
1525 handler = xmlFindCharEncodingHandler("ebcdic");
1526 if (handler != NULL) return(handler);
1527 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1528 if (handler != NULL) return(handler);
1529 handler = xmlFindCharEncodingHandler("IBM-037");
1530 if (handler != NULL) return(handler);
1531 break;
1532 case XML_CHAR_ENCODING_UCS4BE:
1533 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1534 if (handler != NULL) return(handler);
1535 handler = xmlFindCharEncodingHandler("UCS-4");
1536 if (handler != NULL) return(handler);
1537 handler = xmlFindCharEncodingHandler("UCS4");
1538 if (handler != NULL) return(handler);
1539 break;
1540 case XML_CHAR_ENCODING_UCS4LE:
1541 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1542 if (handler != NULL) return(handler);
1543 handler = xmlFindCharEncodingHandler("UCS-4");
1544 if (handler != NULL) return(handler);
1545 handler = xmlFindCharEncodingHandler("UCS4");
1546 if (handler != NULL) return(handler);
1547 break;
1548 case XML_CHAR_ENCODING_UCS4_2143:
1549 break;
1550 case XML_CHAR_ENCODING_UCS4_3412:
1551 break;
1552 case XML_CHAR_ENCODING_UCS2:
1553 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1554 if (handler != NULL) return(handler);
1555 handler = xmlFindCharEncodingHandler("UCS-2");
1556 if (handler != NULL) return(handler);
1557 handler = xmlFindCharEncodingHandler("UCS2");
1558 if (handler != NULL) return(handler);
1559 break;
1560
1561 /*
1562 * We used to keep ISO Latin encodings native in the
1563 * generated data. This led to so many problems that
1564 * this has been removed. One can still change this
1565 * back by registering no-ops encoders for those
1566 */
1567 case XML_CHAR_ENCODING_8859_1:
1568 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1569 if (handler != NULL) return(handler);
1570 break;
1571 case XML_CHAR_ENCODING_8859_2:
1572 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1573 if (handler != NULL) return(handler);
1574 break;
1575 case XML_CHAR_ENCODING_8859_3:
1576 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1577 if (handler != NULL) return(handler);
1578 break;
1579 case XML_CHAR_ENCODING_8859_4:
1580 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1581 if (handler != NULL) return(handler);
1582 break;
1583 case XML_CHAR_ENCODING_8859_5:
1584 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1585 if (handler != NULL) return(handler);
1586 break;
1587 case XML_CHAR_ENCODING_8859_6:
1588 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1589 if (handler != NULL) return(handler);
1590 break;
1591 case XML_CHAR_ENCODING_8859_7:
1592 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1593 if (handler != NULL) return(handler);
1594 break;
1595 case XML_CHAR_ENCODING_8859_8:
1596 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1597 if (handler != NULL) return(handler);
1598 break;
1599 case XML_CHAR_ENCODING_8859_9:
1600 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1601 if (handler != NULL) return(handler);
1602 break;
1603
1604
1605 case XML_CHAR_ENCODING_2022_JP:
1606 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1607 if (handler != NULL) return(handler);
1608 break;
1609 case XML_CHAR_ENCODING_SHIFT_JIS:
1610 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1611 if (handler != NULL) return(handler);
1612 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1613 if (handler != NULL) return(handler);
1614 handler = xmlFindCharEncodingHandler("Shift_JIS");
1615 if (handler != NULL) return(handler);
1616 break;
1617 case XML_CHAR_ENCODING_EUC_JP:
1618 handler = xmlFindCharEncodingHandler("EUC-JP");
1619 if (handler != NULL) return(handler);
1620 break;
1621 default:
1622 break;
1623 }
1624
1625 #ifdef DEBUG_ENCODING
1626 xmlGenericError(xmlGenericErrorContext,
1627 "No handler found for encoding %d\n", enc);
1628 #endif
1629 return(NULL);
1630 }
1631
1632 /**
1633 * xmlFindCharEncodingHandler:
1634 * @name: a string describing the char encoding.
1635 *
1636 * Search in the registered set the handler able to read/write that encoding.
1637 *
1638 * Returns the handler or NULL if not found
1639 */
1640 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1641 xmlFindCharEncodingHandler(const char *name) {
1642 const char *nalias;
1643 const char *norig;
1644 xmlCharEncoding alias;
1645 #ifdef LIBXML_ICONV_ENABLED
1646 xmlCharEncodingHandlerPtr enc;
1647 iconv_t icv_in, icv_out;
1648 #endif /* LIBXML_ICONV_ENABLED */
1649 #ifdef LIBXML_ICU_ENABLED
1650 xmlCharEncodingHandlerPtr encu;
1651 uconv_t *ucv_in, *ucv_out;
1652 #endif /* LIBXML_ICU_ENABLED */
1653 char upper[100];
1654 int i;
1655
1656 if (handlers == NULL) xmlInitCharEncodingHandlers();
1657 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1658 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1659
1660 /*
1661 * Do the alias resolution
1662 */
1663 norig = name;
1664 nalias = xmlGetEncodingAlias(name);
1665 if (nalias != NULL)
1666 name = nalias;
1667
1668 /*
1669 * Check first for directly registered encoding names
1670 */
1671 for (i = 0;i < 99;i++) {
1672 upper[i] = toupper(name[i]);
1673 if (upper[i] == 0) break;
1674 }
1675 upper[i] = 0;
1676
1677 if (handlers != NULL) {
1678 for (i = 0;i < nbCharEncodingHandler; i++) {
1679 if (!strcmp(upper, handlers[i]->name)) {
1680 #ifdef DEBUG_ENCODING
1681 xmlGenericError(xmlGenericErrorContext,
1682 "Found registered handler for encoding %s\n", name);
1683 #endif
1684 return(handlers[i]);
1685 }
1686 }
1687 }
1688
1689 #ifdef LIBXML_ICONV_ENABLED
1690 /* check whether iconv can handle this */
1691 icv_in = iconv_open("UTF-8", name);
1692 icv_out = iconv_open(name, "UTF-8");
1693 if (icv_in == (iconv_t) -1) {
1694 icv_in = iconv_open("UTF-8", upper);
1695 }
1696 if (icv_out == (iconv_t) -1) {
1697 icv_out = iconv_open(upper, "UTF-8");
1698 }
1699 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1700 enc = (xmlCharEncodingHandlerPtr)
1701 xmlMalloc(sizeof(xmlCharEncodingHandler));
1702 if (enc == NULL) {
1703 iconv_close(icv_in);
1704 iconv_close(icv_out);
1705 return(NULL);
1706 }
1707 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1708 enc->name = xmlMemStrdup(name);
1709 enc->input = NULL;
1710 enc->output = NULL;
1711 enc->iconv_in = icv_in;
1712 enc->iconv_out = icv_out;
1713 #ifdef DEBUG_ENCODING
1714 xmlGenericError(xmlGenericErrorContext,
1715 "Found iconv handler for encoding %s\n", name);
1716 #endif
1717 return enc;
1718 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1719 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1720 "iconv : problems with filters for '%s'\n", name);
1721 }
1722 #endif /* LIBXML_ICONV_ENABLED */
1723 #ifdef LIBXML_ICU_ENABLED
1724 /* check whether icu can handle this */
1725 ucv_in = openIcuConverter(name, 1);
1726 ucv_out = openIcuConverter(name, 0);
1727 if (ucv_in != NULL && ucv_out != NULL) {
1728 encu = (xmlCharEncodingHandlerPtr)
1729 xmlMalloc(sizeof(xmlCharEncodingHandler));
1730 if (encu == NULL) {
1731 closeIcuConverter(ucv_in);
1732 closeIcuConverter(ucv_out);
1733 return(NULL);
1734 }
1735 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1736 encu->name = xmlMemStrdup(name);
1737 encu->input = NULL;
1738 encu->output = NULL;
1739 encu->uconv_in = ucv_in;
1740 encu->uconv_out = ucv_out;
1741 #ifdef DEBUG_ENCODING
1742 xmlGenericError(xmlGenericErrorContext,
1743 "Found ICU converter handler for encoding %s\n", name);
1744 #endif
1745 return encu;
1746 } else if (ucv_in != NULL || ucv_out != NULL) {
1747 closeIcuConverter(ucv_in);
1748 closeIcuConverter(ucv_out);
1749 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1750 "ICU converter : problems with filters for '%s'\n", name);
1751 }
1752 #endif /* LIBXML_ICU_ENABLED */
1753
1754 #ifdef DEBUG_ENCODING
1755 xmlGenericError(xmlGenericErrorContext,
1756 "No handler found for encoding %s\n", name);
1757 #endif
1758
1759 /*
1760 * Fallback using the canonical names
1761 */
1762 alias = xmlParseCharEncoding(norig);
1763 if (alias != XML_CHAR_ENCODING_ERROR) {
1764 const char* canon;
1765 canon = xmlGetCharEncodingName(alias);
1766 if ((canon != NULL) && (strcmp(name, canon))) {
1767 return(xmlFindCharEncodingHandler(canon));
1768 }
1769 }
1770
1771 /* If "none of the above", give up */
1772 return(NULL);
1773 }
1774
1775 /************************************************************************
1776 * *
1777 * ICONV based generic conversion functions *
1778 * *
1779 ************************************************************************/
1780
1781 #ifdef LIBXML_ICONV_ENABLED
1782 /**
1783 * xmlIconvWrapper:
1784 * @cd: iconv converter data structure
1785 * @out: a pointer to an array of bytes to store the result
1786 * @outlen: the length of @out
1787 * @in: a pointer to an array of ISO Latin 1 chars
1788 * @inlen: the length of @in
1789 *
1790 * Returns 0 if success, or
1791 * -1 by lack of space, or
1792 * -2 if the transcoding fails (for *in is not valid utf8 string or
1793 * the result of transformation can't fit into the encoding we want), or
1794 * -3 if there the last byte can't form a single output char.
1795 *
1796 * The value of @inlen after return is the number of octets consumed
1797 * as the return value is positive, else unpredictable.
1798 * The value of @outlen after return is the number of ocetes consumed.
1799 */
1800 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1801 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1802 const unsigned char *in, int *inlen) {
1803 size_t icv_inlen, icv_outlen;
1804 const char *icv_in = (const char *) in;
1805 char *icv_out = (char *) out;
1806 int ret;
1807
1808 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1809 if (outlen != NULL) *outlen = 0;
1810 return(-1);
1811 }
1812 icv_inlen = *inlen;
1813 icv_outlen = *outlen;
1814 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1815 *inlen -= icv_inlen;
1816 *outlen -= icv_outlen;
1817 if ((icv_inlen != 0) || (ret == -1)) {
1818 #ifdef EILSEQ
1819 if (errno == EILSEQ) {
1820 return -2;
1821 } else
1822 #endif
1823 #ifdef E2BIG
1824 if (errno == E2BIG) {
1825 return -1;
1826 } else
1827 #endif
1828 #ifdef EINVAL
1829 if (errno == EINVAL) {
1830 return -3;
1831 } else
1832 #endif
1833 {
1834 return -3;
1835 }
1836 }
1837 return 0;
1838 }
1839 #endif /* LIBXML_ICONV_ENABLED */
1840
1841 /************************************************************************
1842 * *
1843 * ICU based generic conversion functions *
1844 * *
1845 ************************************************************************/
1846
1847 #ifdef LIBXML_ICU_ENABLED
1848 /**
1849 * xmlUconvWrapper:
1850 * @cd: ICU uconverter data structure
1851 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1852 * @out: a pointer to an array of bytes to store the result
1853 * @outlen: the length of @out
1854 * @in: a pointer to an array of ISO Latin 1 chars
1855 * @inlen: the length of @in
1856 * @flush: if true, indicates end of input
1857 *
1858 * Returns 0 if success, or
1859 * -1 by lack of space, or
1860 * -2 if the transcoding fails (for *in is not valid utf8 string or
1861 * the result of transformation can't fit into the encoding we want), or
1862 * -3 if there the last byte can't form a single output char.
1863 *
1864 * The value of @inlen after return is the number of octets consumed
1865 * as the return value is positive, else unpredictable.
1866 * The value of @outlen after return is the number of ocetes consumed.
1867 */
1868 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1869 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1870 const unsigned char *in, int *inlen, int flush) {
1871 const char *ucv_in = (const char *) in;
1872 char *ucv_out = (char *) out;
1873 UErrorCode err = U_ZERO_ERROR;
1874
1875 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1876 if (outlen != NULL) *outlen = 0;
1877 return(-1);
1878 }
1879
1880 if (toUnicode) {
1881 /* encoding => UTF-16 => UTF-8 */
1882 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1883 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1884 &cd->pivot_source, &cd->pivot_target,
1885 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1886 } else {
1887 /* UTF-8 => UTF-16 => encoding */
1888 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1889 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1890 &cd->pivot_source, &cd->pivot_target,
1891 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1892 }
1893 *inlen = ucv_in - (const char*) in;
1894 *outlen = ucv_out - (char *) out;
1895 if (U_SUCCESS(err)) {
1896 /* reset pivot buf if this is the last call for input (flush==TRUE) */
1897 if (flush)
1898 cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1899 return 0;
1900 }
1901 if (err == U_BUFFER_OVERFLOW_ERROR)
1902 return -1;
1903 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1904 return -2;
1905 return -3;
1906 }
1907 #endif /* LIBXML_ICU_ENABLED */
1908
1909 /************************************************************************
1910 * *
1911 * The real API used by libxml for on-the-fly conversion *
1912 * *
1913 ************************************************************************/
1914
1915 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1916 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1917 int *outlen, const unsigned char *in, int *inlen, int flush) {
1918 int ret;
1919 (void)flush;
1920
1921 if (handler->input != NULL) {
1922 ret = handler->input(out, outlen, in, inlen);
1923 }
1924 #ifdef LIBXML_ICONV_ENABLED
1925 else if (handler->iconv_in != NULL) {
1926 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1927 }
1928 #endif /* LIBXML_ICONV_ENABLED */
1929 #ifdef LIBXML_ICU_ENABLED
1930 else if (handler->uconv_in != NULL) {
1931 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1932 flush);
1933 }
1934 #endif /* LIBXML_ICU_ENABLED */
1935 else {
1936 *outlen = 0;
1937 *inlen = 0;
1938 ret = -2;
1939 }
1940
1941 return(ret);
1942 }
1943
1944 /* Returns -4 if no output function was found. */
1945 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1946 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1947 int *outlen, const unsigned char *in, int *inlen) {
1948 int ret;
1949
1950 if (handler->output != NULL) {
1951 ret = handler->output(out, outlen, in, inlen);
1952 }
1953 #ifdef LIBXML_ICONV_ENABLED
1954 else if (handler->iconv_out != NULL) {
1955 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1956 }
1957 #endif /* LIBXML_ICONV_ENABLED */
1958 #ifdef LIBXML_ICU_ENABLED
1959 else if (handler->uconv_out != NULL) {
1960 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
1961 TRUE);
1962 }
1963 #endif /* LIBXML_ICU_ENABLED */
1964 else {
1965 *outlen = 0;
1966 *inlen = 0;
1967 ret = -4;
1968 }
1969
1970 return(ret);
1971 }
1972
1973 /**
1974 * xmlCharEncFirstLineInt:
1975 * @handler: char enconding transformation data structure
1976 * @out: an xmlBuffer for the output.
1977 * @in: an xmlBuffer for the input
1978 * @len: number of bytes to convert for the first line, or -1
1979 *
1980 * Front-end for the encoding handler input function, but handle only
1981 * the very first line, i.e. limit itself to 45 chars.
1982 *
1983 * Returns the number of byte written if success, or
1984 * -1 general error
1985 * -2 if the transcoding fails (for *in is not valid utf8 string or
1986 * the result of transformation can't fit into the encoding we want), or
1987 */
1988 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)1989 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1990 xmlBufferPtr in, int len) {
1991 int ret;
1992 int written;
1993 int toconv;
1994
1995 if (handler == NULL) return(-1);
1996 if (out == NULL) return(-1);
1997 if (in == NULL) return(-1);
1998
1999 /* calculate space available */
2000 written = out->size - out->use - 1; /* count '\0' */
2001 toconv = in->use;
2002 /*
2003 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2004 * 45 chars should be sufficient to reach the end of the encoding
2005 * declaration without going too far inside the document content.
2006 * on UTF-16 this means 90bytes, on UCS4 this means 180
2007 * The actual value depending on guessed encoding is passed as @len
2008 * if provided
2009 */
2010 if (len >= 0) {
2011 if (toconv > len)
2012 toconv = len;
2013 } else {
2014 if (toconv > 180)
2015 toconv = 180;
2016 }
2017 if (toconv * 2 >= written) {
2018 xmlBufferGrow(out, toconv * 2);
2019 written = out->size - out->use - 1;
2020 }
2021
2022 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2023 in->content, &toconv, 0);
2024 xmlBufferShrink(in, toconv);
2025 out->use += written;
2026 out->content[out->use] = 0;
2027 if (ret == -1) ret = -3;
2028
2029 #ifdef DEBUG_ENCODING
2030 switch (ret) {
2031 case 0:
2032 xmlGenericError(xmlGenericErrorContext,
2033 "converted %d bytes to %d bytes of input\n",
2034 toconv, written);
2035 break;
2036 case -1:
2037 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2038 toconv, written, in->use);
2039 break;
2040 case -2:
2041 xmlGenericError(xmlGenericErrorContext,
2042 "input conversion failed due to input error\n");
2043 break;
2044 case -3:
2045 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2046 toconv, written, in->use);
2047 break;
2048 default:
2049 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2050 }
2051 #endif /* DEBUG_ENCODING */
2052 /*
2053 * Ignore when input buffer is not on a boundary
2054 */
2055 if (ret == -3) ret = 0;
2056 if (ret == -1) ret = 0;
2057 return(ret);
2058 }
2059
2060 /**
2061 * xmlCharEncFirstLine:
2062 * @handler: char enconding transformation data structure
2063 * @out: an xmlBuffer for the output.
2064 * @in: an xmlBuffer for the input
2065 *
2066 * Front-end for the encoding handler input function, but handle only
2067 * the very first line, i.e. limit itself to 45 chars.
2068 *
2069 * Returns the number of byte written if success, or
2070 * -1 general error
2071 * -2 if the transcoding fails (for *in is not valid utf8 string or
2072 * the result of transformation can't fit into the encoding we want), or
2073 */
2074 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2075 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2076 xmlBufferPtr in) {
2077 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2078 }
2079
2080 /**
2081 * xmlCharEncFirstLineInput:
2082 * @input: a parser input buffer
2083 * @len: number of bytes to convert for the first line, or -1
2084 *
2085 * Front-end for the encoding handler input function, but handle only
2086 * the very first line. Point is that this is based on autodetection
2087 * of the encoding and once that first line is converted we may find
2088 * out that a different decoder is needed to process the input.
2089 *
2090 * Returns the number of byte written if success, or
2091 * -1 general error
2092 * -2 if the transcoding fails (for *in is not valid utf8 string or
2093 * the result of transformation can't fit into the encoding we want), or
2094 */
2095 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2096 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2097 {
2098 int ret;
2099 size_t written;
2100 size_t toconv;
2101 int c_in;
2102 int c_out;
2103 xmlBufPtr in;
2104 xmlBufPtr out;
2105
2106 if ((input == NULL) || (input->encoder == NULL) ||
2107 (input->buffer == NULL) || (input->raw == NULL))
2108 return (-1);
2109 out = input->buffer;
2110 in = input->raw;
2111
2112 toconv = xmlBufUse(in);
2113 if (toconv == 0)
2114 return (0);
2115 written = xmlBufAvail(out) - 1; /* count '\0' */
2116 /*
2117 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2118 * 45 chars should be sufficient to reach the end of the encoding
2119 * declaration without going too far inside the document content.
2120 * on UTF-16 this means 90bytes, on UCS4 this means 180
2121 * The actual value depending on guessed encoding is passed as @len
2122 * if provided
2123 */
2124 if (len >= 0) {
2125 if (toconv > (unsigned int) len)
2126 toconv = len;
2127 } else {
2128 if (toconv > 180)
2129 toconv = 180;
2130 }
2131 if (toconv * 2 >= written) {
2132 xmlBufGrow(out, toconv * 2);
2133 written = xmlBufAvail(out) - 1;
2134 }
2135 if (written > 360)
2136 written = 360;
2137
2138 c_in = toconv;
2139 c_out = written;
2140 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2141 xmlBufContent(in), &c_in, 0);
2142 xmlBufShrink(in, c_in);
2143 xmlBufAddLen(out, c_out);
2144 if (ret == -1)
2145 ret = -3;
2146
2147 switch (ret) {
2148 case 0:
2149 #ifdef DEBUG_ENCODING
2150 xmlGenericError(xmlGenericErrorContext,
2151 "converted %d bytes to %d bytes of input\n",
2152 c_in, c_out);
2153 #endif
2154 break;
2155 case -1:
2156 #ifdef DEBUG_ENCODING
2157 xmlGenericError(xmlGenericErrorContext,
2158 "converted %d bytes to %d bytes of input, %d left\n",
2159 c_in, c_out, (int)xmlBufUse(in));
2160 #endif
2161 break;
2162 case -3:
2163 #ifdef DEBUG_ENCODING
2164 xmlGenericError(xmlGenericErrorContext,
2165 "converted %d bytes to %d bytes of input, %d left\n",
2166 c_in, c_out, (int)xmlBufUse(in));
2167 #endif
2168 break;
2169 case -2: {
2170 char buf[50];
2171 const xmlChar *content = xmlBufContent(in);
2172
2173 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2174 content[0], content[1],
2175 content[2], content[3]);
2176 buf[49] = 0;
2177 xmlEncodingErr(XML_I18N_CONV_FAILED,
2178 "input conversion failed due to input error, bytes %s\n",
2179 buf);
2180 }
2181 }
2182 /*
2183 * Ignore when input buffer is not on a boundary
2184 */
2185 if (ret == -3) ret = 0;
2186 if (ret == -1) ret = 0;
2187 return(ret);
2188 }
2189
2190 /**
2191 * xmlCharEncInput:
2192 * @input: a parser input buffer
2193 * @flush: try to flush all the raw buffer
2194 *
2195 * Generic front-end for the encoding handler on parser input
2196 *
2197 * Returns the number of byte written if success, or
2198 * -1 general error
2199 * -2 if the transcoding fails (for *in is not valid utf8 string or
2200 * the result of transformation can't fit into the encoding we want), or
2201 */
2202 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2203 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2204 {
2205 int ret;
2206 size_t written;
2207 size_t toconv;
2208 int c_in;
2209 int c_out;
2210 xmlBufPtr in;
2211 xmlBufPtr out;
2212
2213 if ((input == NULL) || (input->encoder == NULL) ||
2214 (input->buffer == NULL) || (input->raw == NULL))
2215 return (-1);
2216 out = input->buffer;
2217 in = input->raw;
2218
2219 toconv = xmlBufUse(in);
2220 if (toconv == 0)
2221 return (0);
2222 if ((toconv > 64 * 1024) && (flush == 0))
2223 toconv = 64 * 1024;
2224 written = xmlBufAvail(out);
2225 if (written > 0)
2226 written--; /* count '\0' */
2227 if (toconv * 2 >= written) {
2228 xmlBufGrow(out, toconv * 2);
2229 written = xmlBufAvail(out);
2230 if (written > 0)
2231 written--; /* count '\0' */
2232 }
2233 if ((written > 128 * 1024) && (flush == 0))
2234 written = 128 * 1024;
2235
2236 c_in = toconv;
2237 c_out = written;
2238 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2239 xmlBufContent(in), &c_in, flush);
2240 xmlBufShrink(in, c_in);
2241 xmlBufAddLen(out, c_out);
2242 if (ret == -1)
2243 ret = -3;
2244
2245 switch (ret) {
2246 case 0:
2247 #ifdef DEBUG_ENCODING
2248 xmlGenericError(xmlGenericErrorContext,
2249 "converted %d bytes to %d bytes of input\n",
2250 c_in, c_out);
2251 #endif
2252 break;
2253 case -1:
2254 #ifdef DEBUG_ENCODING
2255 xmlGenericError(xmlGenericErrorContext,
2256 "converted %d bytes to %d bytes of input, %d left\n",
2257 c_in, c_out, (int)xmlBufUse(in));
2258 #endif
2259 break;
2260 case -3:
2261 #ifdef DEBUG_ENCODING
2262 xmlGenericError(xmlGenericErrorContext,
2263 "converted %d bytes to %d bytes of input, %d left\n",
2264 c_in, c_out, (int)xmlBufUse(in));
2265 #endif
2266 break;
2267 case -2: {
2268 char buf[50];
2269 const xmlChar *content = xmlBufContent(in);
2270
2271 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2272 content[0], content[1],
2273 content[2], content[3]);
2274 buf[49] = 0;
2275 xmlEncodingErr(XML_I18N_CONV_FAILED,
2276 "input conversion failed due to input error, bytes %s\n",
2277 buf);
2278 }
2279 }
2280 /*
2281 * Ignore when input buffer is not on a boundary
2282 */
2283 if (ret == -3)
2284 ret = 0;
2285 return (c_out? c_out : ret);
2286 }
2287
2288 /**
2289 * xmlCharEncInFunc:
2290 * @handler: char encoding transformation data structure
2291 * @out: an xmlBuffer for the output.
2292 * @in: an xmlBuffer for the input
2293 *
2294 * Generic front-end for the encoding handler input function
2295 *
2296 * Returns the number of byte written if success, or
2297 * -1 general error
2298 * -2 if the transcoding fails (for *in is not valid utf8 string or
2299 * the result of transformation can't fit into the encoding we want), or
2300 */
2301 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2302 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2303 xmlBufferPtr in)
2304 {
2305 int ret;
2306 int written;
2307 int toconv;
2308
2309 if (handler == NULL)
2310 return (-1);
2311 if (out == NULL)
2312 return (-1);
2313 if (in == NULL)
2314 return (-1);
2315
2316 toconv = in->use;
2317 if (toconv == 0)
2318 return (0);
2319 written = out->size - out->use -1; /* count '\0' */
2320 if (toconv * 2 >= written) {
2321 xmlBufferGrow(out, out->size + toconv * 2);
2322 written = out->size - out->use - 1;
2323 }
2324 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2325 in->content, &toconv, 1);
2326 xmlBufferShrink(in, toconv);
2327 out->use += written;
2328 out->content[out->use] = 0;
2329 if (ret == -1)
2330 ret = -3;
2331
2332 switch (ret) {
2333 case 0:
2334 #ifdef DEBUG_ENCODING
2335 xmlGenericError(xmlGenericErrorContext,
2336 "converted %d bytes to %d bytes of input\n",
2337 toconv, written);
2338 #endif
2339 break;
2340 case -1:
2341 #ifdef DEBUG_ENCODING
2342 xmlGenericError(xmlGenericErrorContext,
2343 "converted %d bytes to %d bytes of input, %d left\n",
2344 toconv, written, in->use);
2345 #endif
2346 break;
2347 case -3:
2348 #ifdef DEBUG_ENCODING
2349 xmlGenericError(xmlGenericErrorContext,
2350 "converted %d bytes to %d bytes of input, %d left\n",
2351 toconv, written, in->use);
2352 #endif
2353 break;
2354 case -2: {
2355 char buf[50];
2356
2357 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2358 in->content[0], in->content[1],
2359 in->content[2], in->content[3]);
2360 buf[49] = 0;
2361 xmlEncodingErr(XML_I18N_CONV_FAILED,
2362 "input conversion failed due to input error, bytes %s\n",
2363 buf);
2364 }
2365 }
2366 /*
2367 * Ignore when input buffer is not on a boundary
2368 */
2369 if (ret == -3)
2370 ret = 0;
2371 return (written? written : ret);
2372 }
2373
2374 #ifdef LIBXML_OUTPUT_ENABLED
2375 /**
2376 * xmlCharEncOutput:
2377 * @output: a parser output buffer
2378 * @init: is this an initialization call without data
2379 *
2380 * Generic front-end for the encoding handler on parser output
2381 * a first call with @init == 1 has to be made first to initiate the
2382 * output in case of non-stateless encoding needing to initiate their
2383 * state or the output (like the BOM in UTF16).
2384 * In case of UTF8 sequence conversion errors for the given encoder,
2385 * the content will be automatically remapped to a CharRef sequence.
2386 *
2387 * Returns the number of byte written if success, or
2388 * -1 general error
2389 * -2 if the transcoding fails (for *in is not valid utf8 string or
2390 * the result of transformation can't fit into the encoding we want), or
2391 */
2392 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2393 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2394 {
2395 int ret;
2396 size_t written;
2397 size_t writtentot = 0;
2398 size_t toconv;
2399 int c_in;
2400 int c_out;
2401 xmlBufPtr in;
2402 xmlBufPtr out;
2403
2404 if ((output == NULL) || (output->encoder == NULL) ||
2405 (output->buffer == NULL) || (output->conv == NULL))
2406 return (-1);
2407 out = output->conv;
2408 in = output->buffer;
2409
2410 retry:
2411
2412 written = xmlBufAvail(out);
2413 if (written > 0)
2414 written--; /* count '\0' */
2415
2416 /*
2417 * First specific handling of the initialization call
2418 */
2419 if (init) {
2420 c_in = 0;
2421 c_out = written;
2422 /* TODO: Check return value. */
2423 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2424 NULL, &c_in);
2425 xmlBufAddLen(out, c_out);
2426 #ifdef DEBUG_ENCODING
2427 xmlGenericError(xmlGenericErrorContext,
2428 "initialized encoder\n");
2429 #endif
2430 return(0);
2431 }
2432
2433 /*
2434 * Conversion itself.
2435 */
2436 toconv = xmlBufUse(in);
2437 if (toconv == 0)
2438 return (0);
2439 if (toconv > 64 * 1024)
2440 toconv = 64 * 1024;
2441 if (toconv * 4 >= written) {
2442 xmlBufGrow(out, toconv * 4);
2443 written = xmlBufAvail(out) - 1;
2444 }
2445 if (written > 256 * 1024)
2446 written = 256 * 1024;
2447
2448 c_in = toconv;
2449 c_out = written;
2450 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2451 xmlBufContent(in), &c_in);
2452 xmlBufShrink(in, c_in);
2453 xmlBufAddLen(out, c_out);
2454 writtentot += c_out;
2455 if (ret == -1) {
2456 if (c_out > 0) {
2457 /* Can be a limitation of iconv or uconv */
2458 goto retry;
2459 }
2460 ret = -3;
2461 }
2462
2463 /*
2464 * Attempt to handle error cases
2465 */
2466 switch (ret) {
2467 case 0:
2468 #ifdef DEBUG_ENCODING
2469 xmlGenericError(xmlGenericErrorContext,
2470 "converted %d bytes to %d bytes of output\n",
2471 c_in, c_out);
2472 #endif
2473 break;
2474 case -1:
2475 #ifdef DEBUG_ENCODING
2476 xmlGenericError(xmlGenericErrorContext,
2477 "output conversion failed by lack of space\n");
2478 #endif
2479 break;
2480 case -3:
2481 #ifdef DEBUG_ENCODING
2482 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2483 c_in, c_out, (int) xmlBufUse(in));
2484 #endif
2485 break;
2486 case -4:
2487 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2488 "xmlCharEncOutFunc: no output function !\n", NULL);
2489 ret = -1;
2490 break;
2491 case -2: {
2492 xmlChar charref[20];
2493 int len = (int) xmlBufUse(in);
2494 xmlChar *content = xmlBufContent(in);
2495 int cur, charrefLen;
2496
2497 cur = xmlGetUTF8Char(content, &len);
2498 if (cur <= 0)
2499 break;
2500
2501 #ifdef DEBUG_ENCODING
2502 xmlGenericError(xmlGenericErrorContext,
2503 "handling output conversion error\n");
2504 xmlGenericError(xmlGenericErrorContext,
2505 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2506 content[0], content[1],
2507 content[2], content[3]);
2508 #endif
2509 /*
2510 * Removes the UTF8 sequence, and replace it by a charref
2511 * and continue the transcoding phase, hoping the error
2512 * did not mangle the encoder state.
2513 */
2514 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2515 "&#%d;", cur);
2516 xmlBufShrink(in, len);
2517 xmlBufGrow(out, charrefLen * 4);
2518 c_out = xmlBufAvail(out) - 1;
2519 c_in = charrefLen;
2520 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2521 charref, &c_in);
2522
2523 if ((ret < 0) || (c_in != charrefLen)) {
2524 char buf[50];
2525
2526 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2527 content[0], content[1],
2528 content[2], content[3]);
2529 buf[49] = 0;
2530 xmlEncodingErr(XML_I18N_CONV_FAILED,
2531 "output conversion failed due to conv error, bytes %s\n",
2532 buf);
2533 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2534 content[0] = ' ';
2535 break;
2536 }
2537
2538 xmlBufAddLen(out, c_out);
2539 writtentot += c_out;
2540 goto retry;
2541 }
2542 }
2543 return(ret);
2544 }
2545 #endif
2546
2547 /**
2548 * xmlCharEncOutFunc:
2549 * @handler: char enconding transformation data structure
2550 * @out: an xmlBuffer for the output.
2551 * @in: an xmlBuffer for the input
2552 *
2553 * Generic front-end for the encoding handler output function
2554 * a first call with @in == NULL has to be made firs to initiate the
2555 * output in case of non-stateless encoding needing to initiate their
2556 * state or the output (like the BOM in UTF16).
2557 * In case of UTF8 sequence conversion errors for the given encoder,
2558 * the content will be automatically remapped to a CharRef sequence.
2559 *
2560 * Returns the number of byte written if success, or
2561 * -1 general error
2562 * -2 if the transcoding fails (for *in is not valid utf8 string or
2563 * the result of transformation can't fit into the encoding we want), or
2564 */
2565 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2566 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2567 xmlBufferPtr in) {
2568 int ret;
2569 int written;
2570 int writtentot = 0;
2571 int toconv;
2572 int output = 0;
2573
2574 if (handler == NULL) return(-1);
2575 if (out == NULL) return(-1);
2576
2577 retry:
2578
2579 written = out->size - out->use;
2580
2581 if (written > 0)
2582 written--; /* Gennady: count '/0' */
2583
2584 /*
2585 * First specific handling of in = NULL, i.e. the initialization call
2586 */
2587 if (in == NULL) {
2588 toconv = 0;
2589 /* TODO: Check return value. */
2590 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2591 NULL, &toconv);
2592 out->use += written;
2593 out->content[out->use] = 0;
2594 #ifdef DEBUG_ENCODING
2595 xmlGenericError(xmlGenericErrorContext,
2596 "initialized encoder\n");
2597 #endif
2598 return(0);
2599 }
2600
2601 /*
2602 * Conversion itself.
2603 */
2604 toconv = in->use;
2605 if (toconv == 0)
2606 return(0);
2607 if (toconv * 4 >= written) {
2608 xmlBufferGrow(out, toconv * 4);
2609 written = out->size - out->use - 1;
2610 }
2611 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2612 in->content, &toconv);
2613 xmlBufferShrink(in, toconv);
2614 out->use += written;
2615 writtentot += written;
2616 out->content[out->use] = 0;
2617 if (ret == -1) {
2618 if (written > 0) {
2619 /* Can be a limitation of iconv or uconv */
2620 goto retry;
2621 }
2622 ret = -3;
2623 }
2624
2625 if (ret >= 0) output += ret;
2626
2627 /*
2628 * Attempt to handle error cases
2629 */
2630 switch (ret) {
2631 case 0:
2632 #ifdef DEBUG_ENCODING
2633 xmlGenericError(xmlGenericErrorContext,
2634 "converted %d bytes to %d bytes of output\n",
2635 toconv, written);
2636 #endif
2637 break;
2638 case -1:
2639 #ifdef DEBUG_ENCODING
2640 xmlGenericError(xmlGenericErrorContext,
2641 "output conversion failed by lack of space\n");
2642 #endif
2643 break;
2644 case -3:
2645 #ifdef DEBUG_ENCODING
2646 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2647 toconv, written, in->use);
2648 #endif
2649 break;
2650 case -4:
2651 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2652 "xmlCharEncOutFunc: no output function !\n", NULL);
2653 ret = -1;
2654 break;
2655 case -2: {
2656 xmlChar charref[20];
2657 int len = in->use;
2658 const xmlChar *utf = (const xmlChar *) in->content;
2659 int cur, charrefLen;
2660
2661 cur = xmlGetUTF8Char(utf, &len);
2662 if (cur <= 0)
2663 break;
2664
2665 #ifdef DEBUG_ENCODING
2666 xmlGenericError(xmlGenericErrorContext,
2667 "handling output conversion error\n");
2668 xmlGenericError(xmlGenericErrorContext,
2669 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2670 in->content[0], in->content[1],
2671 in->content[2], in->content[3]);
2672 #endif
2673 /*
2674 * Removes the UTF8 sequence, and replace it by a charref
2675 * and continue the transcoding phase, hoping the error
2676 * did not mangle the encoder state.
2677 */
2678 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2679 "&#%d;", cur);
2680 xmlBufferShrink(in, len);
2681 xmlBufferGrow(out, charrefLen * 4);
2682 written = out->size - out->use - 1;
2683 toconv = charrefLen;
2684 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2685 charref, &toconv);
2686
2687 if ((ret < 0) || (toconv != charrefLen)) {
2688 char buf[50];
2689
2690 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2691 in->content[0], in->content[1],
2692 in->content[2], in->content[3]);
2693 buf[49] = 0;
2694 xmlEncodingErr(XML_I18N_CONV_FAILED,
2695 "output conversion failed due to conv error, bytes %s\n",
2696 buf);
2697 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2698 in->content[0] = ' ';
2699 break;
2700 }
2701
2702 out->use += written;
2703 writtentot += written;
2704 out->content[out->use] = 0;
2705 goto retry;
2706 }
2707 }
2708 return(ret);
2709 }
2710
2711 /**
2712 * xmlCharEncCloseFunc:
2713 * @handler: char enconding transformation data structure
2714 *
2715 * Generic front-end for encoding handler close function
2716 *
2717 * Returns 0 if success, or -1 in case of error
2718 */
2719 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2720 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2721 int ret = 0;
2722 int tofree = 0;
2723 int i, handler_in_list = 0;
2724
2725 if (handler == NULL) return(-1);
2726 if (handler->name == NULL) return(-1);
2727 if (handlers != NULL) {
2728 for (i = 0;i < nbCharEncodingHandler; i++) {
2729 if (handler == handlers[i]) {
2730 handler_in_list = 1;
2731 break;
2732 }
2733 }
2734 }
2735 #ifdef LIBXML_ICONV_ENABLED
2736 /*
2737 * Iconv handlers can be used only once, free the whole block.
2738 * and the associated icon resources.
2739 */
2740 if ((handler_in_list == 0) &&
2741 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2742 tofree = 1;
2743 if (handler->iconv_out != NULL) {
2744 if (iconv_close(handler->iconv_out))
2745 ret = -1;
2746 handler->iconv_out = NULL;
2747 }
2748 if (handler->iconv_in != NULL) {
2749 if (iconv_close(handler->iconv_in))
2750 ret = -1;
2751 handler->iconv_in = NULL;
2752 }
2753 }
2754 #endif /* LIBXML_ICONV_ENABLED */
2755 #ifdef LIBXML_ICU_ENABLED
2756 if ((handler_in_list == 0) &&
2757 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2758 tofree = 1;
2759 if (handler->uconv_out != NULL) {
2760 closeIcuConverter(handler->uconv_out);
2761 handler->uconv_out = NULL;
2762 }
2763 if (handler->uconv_in != NULL) {
2764 closeIcuConverter(handler->uconv_in);
2765 handler->uconv_in = NULL;
2766 }
2767 }
2768 #endif
2769 if (tofree) {
2770 /* free up only dynamic handlers iconv/uconv */
2771 if (handler->name != NULL)
2772 xmlFree(handler->name);
2773 handler->name = NULL;
2774 xmlFree(handler);
2775 }
2776 #ifdef DEBUG_ENCODING
2777 if (ret)
2778 xmlGenericError(xmlGenericErrorContext,
2779 "failed to close the encoding handler\n");
2780 else
2781 xmlGenericError(xmlGenericErrorContext,
2782 "closed the encoding handler\n");
2783 #endif
2784
2785 return(ret);
2786 }
2787
2788 /**
2789 * xmlByteConsumed:
2790 * @ctxt: an XML parser context
2791 *
2792 * This function provides the current index of the parser relative
2793 * to the start of the current entity. This function is computed in
2794 * bytes from the beginning starting at zero and finishing at the
2795 * size in byte of the file if parsing a file. The function is
2796 * of constant cost if the input is UTF-8 but can be costly if run
2797 * on non-UTF-8 input.
2798 *
2799 * Returns the index in bytes from the beginning of the entity or -1
2800 * in case the index could not be computed.
2801 */
2802 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2803 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2804 xmlParserInputPtr in;
2805
2806 if (ctxt == NULL) return(-1);
2807 in = ctxt->input;
2808 if (in == NULL) return(-1);
2809 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2810 unsigned int unused = 0;
2811 xmlCharEncodingHandler * handler = in->buf->encoder;
2812 /*
2813 * Encoding conversion, compute the number of unused original
2814 * bytes from the input not consumed and substract that from
2815 * the raw consumed value, this is not a cheap operation
2816 */
2817 if (in->end - in->cur > 0) {
2818 unsigned char convbuf[32000];
2819 const unsigned char *cur = (const unsigned char *)in->cur;
2820 int toconv = in->end - in->cur, written = 32000;
2821
2822 int ret;
2823
2824 do {
2825 toconv = in->end - cur;
2826 written = 32000;
2827 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2828 cur, &toconv);
2829 if (ret < 0) {
2830 if (written > 0)
2831 ret = -2;
2832 else
2833 return(-1);
2834 }
2835 unused += written;
2836 cur += toconv;
2837 } while (ret == -2);
2838 }
2839 if (in->buf->rawconsumed < unused)
2840 return(-1);
2841 return(in->buf->rawconsumed - unused);
2842 }
2843 return(in->consumed + (in->cur - in->base));
2844 }
2845
2846 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2847 #ifdef LIBXML_ISO8859X_ENABLED
2848
2849 /**
2850 * UTF8ToISO8859x:
2851 * @out: a pointer to an array of bytes to store the result
2852 * @outlen: the length of @out
2853 * @in: a pointer to an array of UTF-8 chars
2854 * @inlen: the length of @in
2855 * @xlattable: the 2-level transcoding table
2856 *
2857 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2858 * block of chars out.
2859 *
2860 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2861 * The value of @inlen after return is the number of octets consumed
2862 * as the return value is positive, else unpredictable.
2863 * The value of @outlen after return is the number of ocetes consumed.
2864 */
2865 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2866 UTF8ToISO8859x(unsigned char* out, int *outlen,
2867 const unsigned char* in, int *inlen,
2868 unsigned char const *xlattable) {
2869 const unsigned char* outstart = out;
2870 const unsigned char* inend;
2871 const unsigned char* instart = in;
2872 const unsigned char* processed = in;
2873
2874 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2875 (xlattable == NULL))
2876 return(-1);
2877 if (in == NULL) {
2878 /*
2879 * initialization nothing to do
2880 */
2881 *outlen = 0;
2882 *inlen = 0;
2883 return(0);
2884 }
2885 inend = in + (*inlen);
2886 while (in < inend) {
2887 unsigned char d = *in++;
2888 if (d < 0x80) {
2889 *out++ = d;
2890 } else if (d < 0xC0) {
2891 /* trailing byte in leading position */
2892 *outlen = out - outstart;
2893 *inlen = processed - instart;
2894 return(-2);
2895 } else if (d < 0xE0) {
2896 unsigned char c;
2897 if (!(in < inend)) {
2898 /* trailing byte not in input buffer */
2899 *outlen = out - outstart;
2900 *inlen = processed - instart;
2901 return(-3);
2902 }
2903 c = *in++;
2904 if ((c & 0xC0) != 0x80) {
2905 /* not a trailing byte */
2906 *outlen = out - outstart;
2907 *inlen = processed - instart;
2908 return(-2);
2909 }
2910 c = c & 0x3F;
2911 d = d & 0x1F;
2912 d = xlattable [48 + c + xlattable [d] * 64];
2913 if (d == 0) {
2914 /* not in character set */
2915 *outlen = out - outstart;
2916 *inlen = processed - instart;
2917 return(-2);
2918 }
2919 *out++ = d;
2920 } else if (d < 0xF0) {
2921 unsigned char c1;
2922 unsigned char c2;
2923 if (!(in < inend - 1)) {
2924 /* trailing bytes not in input buffer */
2925 *outlen = out - outstart;
2926 *inlen = processed - instart;
2927 return(-3);
2928 }
2929 c1 = *in++;
2930 if ((c1 & 0xC0) != 0x80) {
2931 /* not a trailing byte (c1) */
2932 *outlen = out - outstart;
2933 *inlen = processed - instart;
2934 return(-2);
2935 }
2936 c2 = *in++;
2937 if ((c2 & 0xC0) != 0x80) {
2938 /* not a trailing byte (c2) */
2939 *outlen = out - outstart;
2940 *inlen = processed - instart;
2941 return(-2);
2942 }
2943 c1 = c1 & 0x3F;
2944 c2 = c2 & 0x3F;
2945 d = d & 0x0F;
2946 d = xlattable [48 + c2 + xlattable [48 + c1 +
2947 xlattable [32 + d] * 64] * 64];
2948 if (d == 0) {
2949 /* not in character set */
2950 *outlen = out - outstart;
2951 *inlen = processed - instart;
2952 return(-2);
2953 }
2954 *out++ = d;
2955 } else {
2956 /* cannot transcode >= U+010000 */
2957 *outlen = out - outstart;
2958 *inlen = processed - instart;
2959 return(-2);
2960 }
2961 processed = in;
2962 }
2963 *outlen = out - outstart;
2964 *inlen = processed - instart;
2965 return(*outlen);
2966 }
2967
2968 /**
2969 * ISO8859xToUTF8
2970 * @out: a pointer to an array of bytes to store the result
2971 * @outlen: the length of @out
2972 * @in: a pointer to an array of ISO Latin 1 chars
2973 * @inlen: the length of @in
2974 *
2975 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2976 * block of chars out.
2977 * Returns 0 if success, or -1 otherwise
2978 * The value of @inlen after return is the number of octets consumed
2979 * The value of @outlen after return is the number of ocetes produced.
2980 */
2981 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)2982 ISO8859xToUTF8(unsigned char* out, int *outlen,
2983 const unsigned char* in, int *inlen,
2984 unsigned short const *unicodetable) {
2985 unsigned char* outstart = out;
2986 unsigned char* outend;
2987 const unsigned char* instart = in;
2988 const unsigned char* inend;
2989 const unsigned char* instop;
2990 unsigned int c;
2991
2992 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2993 (in == NULL) || (unicodetable == NULL))
2994 return(-1);
2995 outend = out + *outlen;
2996 inend = in + *inlen;
2997 instop = inend;
2998
2999 while ((in < inend) && (out < outend - 2)) {
3000 if (*in >= 0x80) {
3001 c = unicodetable [*in - 0x80];
3002 if (c == 0) {
3003 /* undefined code point */
3004 *outlen = out - outstart;
3005 *inlen = in - instart;
3006 return (-1);
3007 }
3008 if (c < 0x800) {
3009 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3010 *out++ = (c & 0x3F) | 0x80;
3011 } else {
3012 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3013 *out++ = ((c >> 6) & 0x3F) | 0x80;
3014 *out++ = (c & 0x3F) | 0x80;
3015 }
3016 ++in;
3017 }
3018 if (instop - in > outend - out) instop = in + (outend - out);
3019 while ((*in < 0x80) && (in < instop)) {
3020 *out++ = *in++;
3021 }
3022 }
3023 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3024 *out++ = *in++;
3025 }
3026 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3027 *out++ = *in++;
3028 }
3029 *outlen = out - outstart;
3030 *inlen = in - instart;
3031 return (*outlen);
3032 }
3033
3034
3035 /************************************************************************
3036 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3037 ************************************************************************/
3038
3039 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3040 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3041 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3042 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3043 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3044 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3045 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3046 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3047 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3048 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3049 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3050 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3051 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3052 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3053 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3054 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3055 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3056 };
3057
3058 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3059 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3067 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3068 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3069 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3070 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3071 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3072 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3074 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3079 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3080 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3081 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3082 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3083 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3084 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3085 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3086 };
3087
3088 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3089 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3090 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3091 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3092 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3093 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3094 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3095 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3096 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3097 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3098 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3099 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3100 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3101 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3102 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3103 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3104 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3105 };
3106
3107 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3108 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3116 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3117 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3118 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3119 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3121 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3122 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3135 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3136 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3137 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3138 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3139 };
3140
3141 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3142 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3143 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3144 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3145 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3146 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3147 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3148 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3149 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3150 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3151 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3152 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3153 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3154 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3155 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3156 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3157 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3158 };
3159
3160 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3161 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3162 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3169 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3170 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3171 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3172 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3173 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3174 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3175 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3176 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3177 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3178 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3179 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3180 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3181 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3182 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3185 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3186 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3187 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3188 };
3189
3190 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3191 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3192 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3193 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3194 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3195 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3196 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3197 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3198 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3199 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3200 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3201 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3202 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3203 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3204 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3205 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3206 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3207 };
3208
3209 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3210 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3218 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3219 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3220 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3222 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3223 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3224 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3225 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3226 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 };
3238
3239 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3240 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3241 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3242 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3243 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3244 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3245 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3246 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3247 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3248 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3249 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3250 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3251 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3253 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3254 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3255 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3256 };
3257
3258 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3259 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3267 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3268 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3269 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3276 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3277 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3278 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3279 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 };
3283
3284 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3285 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3286 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3287 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3288 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3289 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3290 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3291 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3292 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3293 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3294 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3295 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3296 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3297 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3298 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3299 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3300 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3301 };
3302
3303 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3304 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3312 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3313 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3314 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3315 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3328 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3329 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3330 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3331 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 };
3336
3337 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3338 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3339 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3340 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3341 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3342 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3343 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3344 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3345 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3346 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3347 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3349 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3350 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3351 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3352 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3353 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3354 };
3355
3356 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3357 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3359 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3365 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3366 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3367 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3368 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3376 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3381 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3386 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3387 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388 };
3389
3390 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3391 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3392 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3393 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3394 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3395 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3396 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3397 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3398 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3399 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3400 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3401 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3402 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3403 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3404 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3405 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3406 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3407 };
3408
3409 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3410 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3418 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3419 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3420 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3421 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3422 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3423 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3424 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3425 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3427 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 };
3434
3435 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3436 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3437 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3438 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3439 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3440 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3441 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3442 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3443 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3444 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3445 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3446 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3447 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3448 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3449 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3450 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3451 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3452 };
3453
3454 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3455 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3460 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3463 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3464 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3465 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3466 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3467 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3468 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3469 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3470 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3473 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3474 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3483 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3484 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3485 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3486 };
3487
3488 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3489 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3490 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3491 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3492 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3493 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3494 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3495 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3496 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3497 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3498 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3499 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3500 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3501 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3502 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3503 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3504 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3505 };
3506
3507 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3508 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3516 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3517 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3523 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3524 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3525 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3526 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3527 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3532 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3533 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 };
3536
3537 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3538 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3539 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3540 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3541 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3542 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3543 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3544 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3545 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3546 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3547 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3548 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3549 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3550 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3551 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3552 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3553 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3554 };
3555
3556 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3557 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3565 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3566 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3567 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3568 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3577 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3578 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3580 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3581 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3582 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3583 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3584 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3585 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3586 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3587 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3588 };
3589
3590 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3591 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3592 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3593 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3594 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3595 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3596 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3597 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3598 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3599 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3600 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3601 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3602 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3603 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3604 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3605 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3606 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3607 };
3608
3609 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3610 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3618 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3619 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3620 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3625 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3626 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3627 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3645 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3647 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3648 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3650 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3651 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3652 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3653 };
3654
3655 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3656 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3657 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3658 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3659 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3660 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3661 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3662 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3663 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3664 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3665 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3666 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3667 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3668 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3669 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3670 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3671 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3672 };
3673
3674 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3675 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3683 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3684 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3685 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3686 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3698 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3699 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3700 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3701 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3702 };
3703
3704 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3705 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3706 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3707 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3708 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3709 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3710 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3711 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3712 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3713 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3714 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3715 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3716 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3717 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3718 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3719 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3720 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3721 };
3722
3723 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3724 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3725 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3732 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3733 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3734 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3735 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3736 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3741 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3743 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3753 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3757 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3760 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3761 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3762 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3763 };
3764
3765
3766 /*
3767 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3768 */
3769
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3770 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3771 const unsigned char* in, int *inlen) {
3772 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3773 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3774 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3775 const unsigned char* in, int *inlen) {
3776 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3777 }
3778
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3779 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3780 const unsigned char* in, int *inlen) {
3781 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3782 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3783 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3784 const unsigned char* in, int *inlen) {
3785 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3786 }
3787
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3788 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3789 const unsigned char* in, int *inlen) {
3790 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3791 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3792 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3793 const unsigned char* in, int *inlen) {
3794 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3795 }
3796
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3797 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3798 const unsigned char* in, int *inlen) {
3799 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3800 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3801 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3802 const unsigned char* in, int *inlen) {
3803 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3804 }
3805
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3806 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3807 const unsigned char* in, int *inlen) {
3808 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3809 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3810 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3811 const unsigned char* in, int *inlen) {
3812 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3813 }
3814
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3815 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3816 const unsigned char* in, int *inlen) {
3817 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3818 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3819 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3820 const unsigned char* in, int *inlen) {
3821 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3822 }
3823
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3824 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3825 const unsigned char* in, int *inlen) {
3826 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3827 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3828 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3829 const unsigned char* in, int *inlen) {
3830 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3831 }
3832
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3833 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3834 const unsigned char* in, int *inlen) {
3835 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3836 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3837 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3838 const unsigned char* in, int *inlen) {
3839 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3840 }
3841
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3842 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3843 const unsigned char* in, int *inlen) {
3844 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3845 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3846 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3847 const unsigned char* in, int *inlen) {
3848 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3849 }
3850
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3851 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3852 const unsigned char* in, int *inlen) {
3853 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3854 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3855 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3856 const unsigned char* in, int *inlen) {
3857 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3858 }
3859
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3860 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3861 const unsigned char* in, int *inlen) {
3862 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3863 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3864 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3865 const unsigned char* in, int *inlen) {
3866 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3867 }
3868
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3869 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3870 const unsigned char* in, int *inlen) {
3871 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3872 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3873 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3874 const unsigned char* in, int *inlen) {
3875 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3876 }
3877
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3878 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3879 const unsigned char* in, int *inlen) {
3880 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3881 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3882 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3883 const unsigned char* in, int *inlen) {
3884 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3885 }
3886
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3887 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3888 const unsigned char* in, int *inlen) {
3889 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3890 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3891 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3892 const unsigned char* in, int *inlen) {
3893 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3894 }
3895
3896 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3897 xmlRegisterCharEncodingHandlersISO8859x (void) {
3898 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3899 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3900 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3901 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3902 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3903 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3904 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3905 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3906 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3907 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3908 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3909 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3910 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3911 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3912 }
3913
3914 #endif
3915 #endif
3916
3917 #define bottom_encoding
3918 #include "elfgcchack.h"
3919