1 /*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * daniel@veillard.com
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21 */
22
23 #define IN_LIBXML
24 #include "libxml.h"
25
26 #include <string.h>
27 #include <limits.h>
28
29 #ifdef HAVE_CTYPE_H
30 #include <ctype.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef LIBXML_ICONV_ENABLED
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #endif
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
44 #endif
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
47
48 #include "buf.h"
49 #include "enc.h"
50
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
57 const char *name;
58 const char *alias;
59 };
60
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
64
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66 #if 0
67 #define DEBUG_ENCODING /* Define this to get encoding traces */
68 #endif
69 #else
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
72 #endif
73 #endif
74
75 static int xmlLittleEndian = 1;
76
77 /**
78 * xmlEncodingErrMemory:
79 * @extra: extra information
80 *
81 * Handle an out of memory condition
82 */
83 static void
xmlEncodingErrMemory(const char * extra)84 xmlEncodingErrMemory(const char *extra)
85 {
86 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87 }
88
89 /**
90 * xmlErrEncoding:
91 * @error: the error number
92 * @msg: the error message
93 *
94 * n encoding error
95 */
96 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98 {
99 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100 XML_FROM_I18N, error, XML_ERR_FATAL,
101 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102 }
103
104 #ifdef LIBXML_ICU_ENABLED
105 static uconv_t*
openIcuConverter(const char * name,int toUnicode)106 openIcuConverter(const char* name, int toUnicode)
107 {
108 UErrorCode status = U_ZERO_ERROR;
109 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110 if (conv == NULL)
111 return NULL;
112
113 conv->pivot_source = conv->pivot_buf;
114 conv->pivot_target = conv->pivot_buf;
115
116 conv->uconv = ucnv_open(name, &status);
117 if (U_FAILURE(status))
118 goto error;
119
120 status = U_ZERO_ERROR;
121 if (toUnicode) {
122 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123 NULL, NULL, NULL, &status);
124 }
125 else {
126 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127 NULL, NULL, NULL, &status);
128 }
129 if (U_FAILURE(status))
130 goto error;
131
132 status = U_ZERO_ERROR;
133 conv->utf8 = ucnv_open("UTF-8", &status);
134 if (U_SUCCESS(status))
135 return conv;
136
137 error:
138 if (conv->uconv)
139 ucnv_close(conv->uconv);
140 xmlFree(conv);
141 return NULL;
142 }
143
144 static void
closeIcuConverter(uconv_t * conv)145 closeIcuConverter(uconv_t *conv)
146 {
147 if (conv != NULL) {
148 ucnv_close(conv->uconv);
149 ucnv_close(conv->utf8);
150 xmlFree(conv);
151 }
152 }
153 #endif /* LIBXML_ICU_ENABLED */
154
155 /************************************************************************
156 * *
157 * Conversions To/From UTF8 encoding *
158 * *
159 ************************************************************************/
160
161 /**
162 * asciiToUTF8:
163 * @out: a pointer to an array of bytes to store the result
164 * @outlen: the length of @out
165 * @in: a pointer to an array of ASCII chars
166 * @inlen: the length of @in
167 *
168 * Take a block of ASCII chars in and try to convert it to an UTF-8
169 * block of chars out.
170 * Returns 0 if success, or -1 otherwise
171 * The value of @inlen after return is the number of octets consumed
172 * if the return value is positive, else unpredictable.
173 * The value of @outlen after return is the number of octets produced.
174 */
175 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)176 asciiToUTF8(unsigned char* out, int *outlen,
177 const unsigned char* in, int *inlen) {
178 unsigned char* outstart = out;
179 const unsigned char* base = in;
180 const unsigned char* processed = in;
181 unsigned char* outend = out + *outlen;
182 const unsigned char* inend;
183 unsigned int c;
184
185 inend = in + (*inlen);
186 while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 c= *in++;
188
189 if (out >= outend)
190 break;
191 if (c < 0x80) {
192 *out++ = c;
193 } else {
194 *outlen = out - outstart;
195 *inlen = processed - base;
196 return(-1);
197 }
198
199 processed = (const unsigned char*) in;
200 }
201 *outlen = out - outstart;
202 *inlen = processed - base;
203 return(*outlen);
204 }
205
206 #ifdef LIBXML_OUTPUT_ENABLED
207 /**
208 * UTF8Toascii:
209 * @out: a pointer to an array of bytes to store the result
210 * @outlen: the length of @out
211 * @in: a pointer to an array of UTF-8 chars
212 * @inlen: the length of @in
213 *
214 * Take a block of UTF-8 chars in and try to convert it to an ASCII
215 * block of chars out.
216 *
217 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218 * The value of @inlen after return is the number of octets consumed
219 * if the return value is positive, else unpredictable.
220 * The value of @outlen after return is the number of octets produced.
221 */
222 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)223 UTF8Toascii(unsigned char* out, int *outlen,
224 const unsigned char* in, int *inlen) {
225 const unsigned char* processed = in;
226 const unsigned char* outend;
227 const unsigned char* outstart = out;
228 const unsigned char* instart = in;
229 const unsigned char* inend;
230 unsigned int c, d;
231 int trailing;
232
233 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234 if (in == NULL) {
235 /*
236 * initialization nothing to do
237 */
238 *outlen = 0;
239 *inlen = 0;
240 return(0);
241 }
242 inend = in + (*inlen);
243 outend = out + (*outlen);
244 while (in < inend) {
245 d = *in++;
246 if (d < 0x80) { c= d; trailing= 0; }
247 else if (d < 0xC0) {
248 /* trailing byte in leading position */
249 *outlen = out - outstart;
250 *inlen = processed - instart;
251 return(-2);
252 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
253 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
254 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
255 else {
256 /* no chance for this in Ascii */
257 *outlen = out - outstart;
258 *inlen = processed - instart;
259 return(-2);
260 }
261
262 if (inend - in < trailing) {
263 break;
264 }
265
266 for ( ; trailing; trailing--) {
267 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268 break;
269 c <<= 6;
270 c |= d & 0x3F;
271 }
272
273 /* assertion: c is a single UTF-4 value */
274 if (c < 0x80) {
275 if (out >= outend)
276 break;
277 *out++ = c;
278 } else {
279 /* no chance for this in Ascii */
280 *outlen = out - outstart;
281 *inlen = processed - instart;
282 return(-2);
283 }
284 processed = in;
285 }
286 *outlen = out - outstart;
287 *inlen = processed - instart;
288 return(*outlen);
289 }
290 #endif /* LIBXML_OUTPUT_ENABLED */
291
292 /**
293 * isolat1ToUTF8:
294 * @out: a pointer to an array of bytes to store the result
295 * @outlen: the length of @out
296 * @in: a pointer to an array of ISO Latin 1 chars
297 * @inlen: the length of @in
298 *
299 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300 * block of chars out.
301 * Returns the number of bytes written if success, or -1 otherwise
302 * The value of @inlen after return is the number of octets consumed
303 * if the return value is positive, else unpredictable.
304 * The value of @outlen after return is the number of octets produced.
305 */
306 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)307 isolat1ToUTF8(unsigned char* out, int *outlen,
308 const unsigned char* in, int *inlen) {
309 unsigned char* outstart = out;
310 const unsigned char* base = in;
311 unsigned char* outend;
312 const unsigned char* inend;
313 const unsigned char* instop;
314
315 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316 return(-1);
317
318 outend = out + *outlen;
319 inend = in + (*inlen);
320 instop = inend;
321
322 while ((in < inend) && (out < outend - 1)) {
323 if (*in >= 0x80) {
324 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
325 *out++ = ((*in) & 0x3F) | 0x80;
326 ++in;
327 }
328 if ((instop - in) > (outend - out)) instop = in + (outend - out);
329 while ((in < instop) && (*in < 0x80)) {
330 *out++ = *in++;
331 }
332 }
333 if ((in < inend) && (out < outend) && (*in < 0x80)) {
334 *out++ = *in++;
335 }
336 *outlen = out - outstart;
337 *inlen = in - base;
338 return(*outlen);
339 }
340
341 /**
342 * UTF8ToUTF8:
343 * @out: a pointer to an array of bytes to store the result
344 * @outlen: the length of @out
345 * @inb: a pointer to an array of UTF-8 chars
346 * @inlenb: the length of @in in UTF-8 chars
347 *
348 * No op copy operation for UTF8 handling.
349 *
350 * Returns the number of bytes written, or -1 if lack of space.
351 * The value of *inlen after return is the number of octets consumed
352 * if the return value is positive, else unpredictable.
353 */
354 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)355 UTF8ToUTF8(unsigned char* out, int *outlen,
356 const unsigned char* inb, int *inlenb)
357 {
358 int len;
359
360 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361 return(-1);
362 if (inb == NULL) {
363 /* inb == NULL means output is initialized. */
364 *outlen = 0;
365 *inlenb = 0;
366 return(0);
367 }
368 if (*outlen > *inlenb) {
369 len = *inlenb;
370 } else {
371 len = *outlen;
372 }
373 if (len < 0)
374 return(-1);
375
376 /*
377 * FIXME: Conversion functions must assure valid UTF-8, so we have
378 * to check for UTF-8 validity. Preferably, this converter shouldn't
379 * be used at all.
380 */
381 memcpy(out, inb, len);
382
383 *outlen = len;
384 *inlenb = len;
385 return(*outlen);
386 }
387
388
389 #ifdef LIBXML_OUTPUT_ENABLED
390 /**
391 * UTF8Toisolat1:
392 * @out: a pointer to an array of bytes to store the result
393 * @outlen: the length of @out
394 * @in: a pointer to an array of UTF-8 chars
395 * @inlen: the length of @in
396 *
397 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
398 * block of chars out.
399 *
400 * Returns the number of bytes written if success, -2 if the transcoding fails,
401 or -1 otherwise
402 * The value of @inlen after return is the number of octets consumed
403 * if the return value is positive, else unpredictable.
404 * The value of @outlen after return is the number of octets produced.
405 */
406 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)407 UTF8Toisolat1(unsigned char* out, int *outlen,
408 const unsigned char* in, int *inlen) {
409 const unsigned char* processed = in;
410 const unsigned char* outend;
411 const unsigned char* outstart = out;
412 const unsigned char* instart = in;
413 const unsigned char* inend;
414 unsigned int c, d;
415 int trailing;
416
417 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
418 if (in == NULL) {
419 /*
420 * initialization nothing to do
421 */
422 *outlen = 0;
423 *inlen = 0;
424 return(0);
425 }
426 inend = in + (*inlen);
427 outend = out + (*outlen);
428 while (in < inend) {
429 d = *in++;
430 if (d < 0x80) { c= d; trailing= 0; }
431 else if (d < 0xC0) {
432 /* trailing byte in leading position */
433 *outlen = out - outstart;
434 *inlen = processed - instart;
435 return(-2);
436 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
437 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
438 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
439 else {
440 /* no chance for this in IsoLat1 */
441 *outlen = out - outstart;
442 *inlen = processed - instart;
443 return(-2);
444 }
445
446 if (inend - in < trailing) {
447 break;
448 }
449
450 for ( ; trailing; trailing--) {
451 if (in >= inend)
452 break;
453 if (((d= *in++) & 0xC0) != 0x80) {
454 *outlen = out - outstart;
455 *inlen = processed - instart;
456 return(-2);
457 }
458 c <<= 6;
459 c |= d & 0x3F;
460 }
461
462 /* assertion: c is a single UTF-4 value */
463 if (c <= 0xFF) {
464 if (out >= outend)
465 break;
466 *out++ = c;
467 } else {
468 /* no chance for this in IsoLat1 */
469 *outlen = out - outstart;
470 *inlen = processed - instart;
471 return(-2);
472 }
473 processed = in;
474 }
475 *outlen = out - outstart;
476 *inlen = processed - instart;
477 return(*outlen);
478 }
479 #endif /* LIBXML_OUTPUT_ENABLED */
480
481 /**
482 * UTF16LEToUTF8:
483 * @out: a pointer to an array of bytes to store the result
484 * @outlen: the length of @out
485 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
486 * @inlenb: the length of @in in UTF-16LE chars
487 *
488 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
489 * block of chars out. This function assumes the endian property
490 * is the same between the native type of this machine and the
491 * inputed one.
492 *
493 * Returns the number of bytes written, or -1 if lack of space, or -2
494 * if the transcoding fails (if *in is not a valid utf16 string)
495 * The value of *inlen after return is the number of octets consumed
496 * if the return value is positive, else unpredictable.
497 */
498 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)499 UTF16LEToUTF8(unsigned char* out, int *outlen,
500 const unsigned char* inb, int *inlenb)
501 {
502 unsigned char* outstart = out;
503 const unsigned char* processed = inb;
504 unsigned char* outend;
505 unsigned short* in = (unsigned short*) inb;
506 unsigned short* inend;
507 unsigned int c, d, inlen;
508 unsigned char *tmp;
509 int bits;
510
511 if (*outlen == 0) {
512 *inlenb = 0;
513 return(0);
514 }
515 outend = out + *outlen;
516 if ((*inlenb % 2) == 1)
517 (*inlenb)--;
518 inlen = *inlenb / 2;
519 inend = in + inlen;
520 while ((in < inend) && (out - outstart + 5 < *outlen)) {
521 if (xmlLittleEndian) {
522 c= *in++;
523 } else {
524 tmp = (unsigned char *) in;
525 c = *tmp++;
526 c = c | (((unsigned int)*tmp) << 8);
527 in++;
528 }
529 if ((c & 0xFC00) == 0xD800) { /* surrogates */
530 if (in >= inend) { /* (in > inend) shouldn't happens */
531 break;
532 }
533 if (xmlLittleEndian) {
534 d = *in++;
535 } else {
536 tmp = (unsigned char *) in;
537 d = *tmp++;
538 d = d | (((unsigned int)*tmp) << 8);
539 in++;
540 }
541 if ((d & 0xFC00) == 0xDC00) {
542 c &= 0x03FF;
543 c <<= 10;
544 c |= d & 0x03FF;
545 c += 0x10000;
546 }
547 else {
548 *outlen = out - outstart;
549 *inlenb = processed - inb;
550 return(-2);
551 }
552 }
553
554 /* assertion: c is a single UTF-4 value */
555 if (out >= outend)
556 break;
557 if (c < 0x80) { *out++= c; bits= -6; }
558 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
559 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
560 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
561
562 for ( ; bits >= 0; bits-= 6) {
563 if (out >= outend)
564 break;
565 *out++= ((c >> bits) & 0x3F) | 0x80;
566 }
567 processed = (const unsigned char*) in;
568 }
569 *outlen = out - outstart;
570 *inlenb = processed - inb;
571 return(*outlen);
572 }
573
574 #ifdef LIBXML_OUTPUT_ENABLED
575 /**
576 * UTF8ToUTF16LE:
577 * @outb: a pointer to an array of bytes to store the result
578 * @outlen: the length of @outb
579 * @in: a pointer to an array of UTF-8 chars
580 * @inlen: the length of @in
581 *
582 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
583 * block of chars out.
584 *
585 * Returns the number of bytes written, or -1 if lack of space, or -2
586 * if the transcoding failed.
587 */
588 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)589 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
590 const unsigned char* in, int *inlen)
591 {
592 unsigned short* out = (unsigned short*) outb;
593 const unsigned char* processed = in;
594 const unsigned char *const instart = in;
595 unsigned short* outstart= out;
596 unsigned short* outend;
597 const unsigned char* inend;
598 unsigned int c, d;
599 int trailing;
600 unsigned char *tmp;
601 unsigned short tmp1, tmp2;
602
603 /* UTF16LE encoding has no BOM */
604 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
605 if (in == NULL) {
606 *outlen = 0;
607 *inlen = 0;
608 return(0);
609 }
610 inend= in + *inlen;
611 outend = out + (*outlen / 2);
612 while (in < inend) {
613 d= *in++;
614 if (d < 0x80) { c= d; trailing= 0; }
615 else if (d < 0xC0) {
616 /* trailing byte in leading position */
617 *outlen = (out - outstart) * 2;
618 *inlen = processed - instart;
619 return(-2);
620 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
621 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
622 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
623 else {
624 /* no chance for this in UTF-16 */
625 *outlen = (out - outstart) * 2;
626 *inlen = processed - instart;
627 return(-2);
628 }
629
630 if (inend - in < trailing) {
631 break;
632 }
633
634 for ( ; trailing; trailing--) {
635 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
636 break;
637 c <<= 6;
638 c |= d & 0x3F;
639 }
640
641 /* assertion: c is a single UTF-4 value */
642 if (c < 0x10000) {
643 if (out >= outend)
644 break;
645 if (xmlLittleEndian) {
646 *out++ = c;
647 } else {
648 tmp = (unsigned char *) out;
649 *tmp = c ;
650 *(tmp + 1) = c >> 8 ;
651 out++;
652 }
653 }
654 else if (c < 0x110000) {
655 if (out+1 >= outend)
656 break;
657 c -= 0x10000;
658 if (xmlLittleEndian) {
659 *out++ = 0xD800 | (c >> 10);
660 *out++ = 0xDC00 | (c & 0x03FF);
661 } else {
662 tmp1 = 0xD800 | (c >> 10);
663 tmp = (unsigned char *) out;
664 *tmp = (unsigned char) tmp1;
665 *(tmp + 1) = tmp1 >> 8;
666 out++;
667
668 tmp2 = 0xDC00 | (c & 0x03FF);
669 tmp = (unsigned char *) out;
670 *tmp = (unsigned char) tmp2;
671 *(tmp + 1) = tmp2 >> 8;
672 out++;
673 }
674 }
675 else
676 break;
677 processed = in;
678 }
679 *outlen = (out - outstart) * 2;
680 *inlen = processed - instart;
681 return(*outlen);
682 }
683
684 /**
685 * UTF8ToUTF16:
686 * @outb: a pointer to an array of bytes to store the result
687 * @outlen: the length of @outb
688 * @in: a pointer to an array of UTF-8 chars
689 * @inlen: the length of @in
690 *
691 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
692 * block of chars out.
693 *
694 * Returns the number of bytes written, or -1 if lack of space, or -2
695 * if the transcoding failed.
696 */
697 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)698 UTF8ToUTF16(unsigned char* outb, int *outlen,
699 const unsigned char* in, int *inlen)
700 {
701 if (in == NULL) {
702 /*
703 * initialization, add the Byte Order Mark for UTF-16LE
704 */
705 if (*outlen >= 2) {
706 outb[0] = 0xFF;
707 outb[1] = 0xFE;
708 *outlen = 2;
709 *inlen = 0;
710 #ifdef DEBUG_ENCODING
711 xmlGenericError(xmlGenericErrorContext,
712 "Added FFFE Byte Order Mark\n");
713 #endif
714 return(2);
715 }
716 *outlen = 0;
717 *inlen = 0;
718 return(0);
719 }
720 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
721 }
722 #endif /* LIBXML_OUTPUT_ENABLED */
723
724 /**
725 * UTF16BEToUTF8:
726 * @out: a pointer to an array of bytes to store the result
727 * @outlen: the length of @out
728 * @inb: a pointer to an array of UTF-16 passed as a byte array
729 * @inlenb: the length of @in in UTF-16 chars
730 *
731 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
732 * block of chars out. This function assumes the endian property
733 * is the same between the native type of this machine and the
734 * inputed one.
735 *
736 * Returns the number of bytes written, or -1 if lack of space, or -2
737 * if the transcoding fails (if *in is not a valid utf16 string)
738 * The value of *inlen after return is the number of octets consumed
739 * if the return value is positive, else unpredictable.
740 */
741 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)742 UTF16BEToUTF8(unsigned char* out, int *outlen,
743 const unsigned char* inb, int *inlenb)
744 {
745 unsigned char* outstart = out;
746 const unsigned char* processed = inb;
747 unsigned char* outend = out + *outlen;
748 unsigned short* in = (unsigned short*) inb;
749 unsigned short* inend;
750 unsigned int c, d, inlen;
751 unsigned char *tmp;
752 int bits;
753
754 if ((*inlenb % 2) == 1)
755 (*inlenb)--;
756 inlen = *inlenb / 2;
757 inend= in + inlen;
758 while (in < inend) {
759 if (xmlLittleEndian) {
760 tmp = (unsigned char *) in;
761 c = *tmp++;
762 c = c << 8;
763 c = c | (unsigned int) *tmp;
764 in++;
765 } else {
766 c= *in++;
767 }
768 if ((c & 0xFC00) == 0xD800) { /* surrogates */
769 if (in >= inend) { /* (in > inend) shouldn't happens */
770 *outlen = out - outstart;
771 *inlenb = processed - inb;
772 return(-2);
773 }
774 if (xmlLittleEndian) {
775 tmp = (unsigned char *) in;
776 d = *tmp++;
777 d = d << 8;
778 d = d | (unsigned int) *tmp;
779 in++;
780 } else {
781 d= *in++;
782 }
783 if ((d & 0xFC00) == 0xDC00) {
784 c &= 0x03FF;
785 c <<= 10;
786 c |= d & 0x03FF;
787 c += 0x10000;
788 }
789 else {
790 *outlen = out - outstart;
791 *inlenb = processed - inb;
792 return(-2);
793 }
794 }
795
796 /* assertion: c is a single UTF-4 value */
797 if (out >= outend)
798 break;
799 if (c < 0x80) { *out++= c; bits= -6; }
800 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
801 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
802 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
803
804 for ( ; bits >= 0; bits-= 6) {
805 if (out >= outend)
806 break;
807 *out++= ((c >> bits) & 0x3F) | 0x80;
808 }
809 processed = (const unsigned char*) in;
810 }
811 *outlen = out - outstart;
812 *inlenb = processed - inb;
813 return(*outlen);
814 }
815
816 #ifdef LIBXML_OUTPUT_ENABLED
817 /**
818 * UTF8ToUTF16BE:
819 * @outb: a pointer to an array of bytes to store the result
820 * @outlen: the length of @outb
821 * @in: a pointer to an array of UTF-8 chars
822 * @inlen: the length of @in
823 *
824 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
825 * block of chars out.
826 *
827 * Returns the number of byte written, or -1 by lack of space, or -2
828 * if the transcoding failed.
829 */
830 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)831 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
832 const unsigned char* in, int *inlen)
833 {
834 unsigned short* out = (unsigned short*) outb;
835 const unsigned char* processed = in;
836 const unsigned char *const instart = in;
837 unsigned short* outstart= out;
838 unsigned short* outend;
839 const unsigned char* inend;
840 unsigned int c, d;
841 int trailing;
842 unsigned char *tmp;
843 unsigned short tmp1, tmp2;
844
845 /* UTF-16BE has no BOM */
846 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
847 if (in == NULL) {
848 *outlen = 0;
849 *inlen = 0;
850 return(0);
851 }
852 inend= in + *inlen;
853 outend = out + (*outlen / 2);
854 while (in < inend) {
855 d= *in++;
856 if (d < 0x80) { c= d; trailing= 0; }
857 else if (d < 0xC0) {
858 /* trailing byte in leading position */
859 *outlen = out - outstart;
860 *inlen = processed - instart;
861 return(-2);
862 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
863 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
864 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
865 else {
866 /* no chance for this in UTF-16 */
867 *outlen = out - outstart;
868 *inlen = processed - instart;
869 return(-2);
870 }
871
872 if (inend - in < trailing) {
873 break;
874 }
875
876 for ( ; trailing; trailing--) {
877 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
878 c <<= 6;
879 c |= d & 0x3F;
880 }
881
882 /* assertion: c is a single UTF-4 value */
883 if (c < 0x10000) {
884 if (out >= outend) break;
885 if (xmlLittleEndian) {
886 tmp = (unsigned char *) out;
887 *tmp = c >> 8;
888 *(tmp + 1) = c;
889 out++;
890 } else {
891 *out++ = c;
892 }
893 }
894 else if (c < 0x110000) {
895 if (out+1 >= outend) break;
896 c -= 0x10000;
897 if (xmlLittleEndian) {
898 tmp1 = 0xD800 | (c >> 10);
899 tmp = (unsigned char *) out;
900 *tmp = tmp1 >> 8;
901 *(tmp + 1) = (unsigned char) tmp1;
902 out++;
903
904 tmp2 = 0xDC00 | (c & 0x03FF);
905 tmp = (unsigned char *) out;
906 *tmp = tmp2 >> 8;
907 *(tmp + 1) = (unsigned char) tmp2;
908 out++;
909 } else {
910 *out++ = 0xD800 | (c >> 10);
911 *out++ = 0xDC00 | (c & 0x03FF);
912 }
913 }
914 else
915 break;
916 processed = in;
917 }
918 *outlen = (out - outstart) * 2;
919 *inlen = processed - instart;
920 return(*outlen);
921 }
922 #endif /* LIBXML_OUTPUT_ENABLED */
923
924 /************************************************************************
925 * *
926 * Generic encoding handling routines *
927 * *
928 ************************************************************************/
929
930 /**
931 * xmlDetectCharEncoding:
932 * @in: a pointer to the first bytes of the XML entity, must be at least
933 * 2 bytes long (at least 4 if encoding is UTF4 variant).
934 * @len: pointer to the length of the buffer
935 *
936 * Guess the encoding of the entity using the first bytes of the entity content
937 * according to the non-normative appendix F of the XML-1.0 recommendation.
938 *
939 * Returns one of the XML_CHAR_ENCODING_... values.
940 */
941 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)942 xmlDetectCharEncoding(const unsigned char* in, int len)
943 {
944 if (in == NULL)
945 return(XML_CHAR_ENCODING_NONE);
946 if (len >= 4) {
947 if ((in[0] == 0x00) && (in[1] == 0x00) &&
948 (in[2] == 0x00) && (in[3] == 0x3C))
949 return(XML_CHAR_ENCODING_UCS4BE);
950 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
951 (in[2] == 0x00) && (in[3] == 0x00))
952 return(XML_CHAR_ENCODING_UCS4LE);
953 if ((in[0] == 0x00) && (in[1] == 0x00) &&
954 (in[2] == 0x3C) && (in[3] == 0x00))
955 return(XML_CHAR_ENCODING_UCS4_2143);
956 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
957 (in[2] == 0x00) && (in[3] == 0x00))
958 return(XML_CHAR_ENCODING_UCS4_3412);
959 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
960 (in[2] == 0xA7) && (in[3] == 0x94))
961 return(XML_CHAR_ENCODING_EBCDIC);
962 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
963 (in[2] == 0x78) && (in[3] == 0x6D))
964 return(XML_CHAR_ENCODING_UTF8);
965 /*
966 * Although not part of the recommendation, we also
967 * attempt an "auto-recognition" of UTF-16LE and
968 * UTF-16BE encodings.
969 */
970 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
971 (in[2] == 0x3F) && (in[3] == 0x00))
972 return(XML_CHAR_ENCODING_UTF16LE);
973 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
974 (in[2] == 0x00) && (in[3] == 0x3F))
975 return(XML_CHAR_ENCODING_UTF16BE);
976 }
977 if (len >= 3) {
978 /*
979 * Errata on XML-1.0 June 20 2001
980 * We now allow an UTF8 encoded BOM
981 */
982 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
983 (in[2] == 0xBF))
984 return(XML_CHAR_ENCODING_UTF8);
985 }
986 /* For UTF-16 we can recognize by the BOM */
987 if (len >= 2) {
988 if ((in[0] == 0xFE) && (in[1] == 0xFF))
989 return(XML_CHAR_ENCODING_UTF16BE);
990 if ((in[0] == 0xFF) && (in[1] == 0xFE))
991 return(XML_CHAR_ENCODING_UTF16LE);
992 }
993 return(XML_CHAR_ENCODING_NONE);
994 }
995
996 /**
997 * xmlCleanupEncodingAliases:
998 *
999 * Unregisters all aliases
1000 */
1001 void
xmlCleanupEncodingAliases(void)1002 xmlCleanupEncodingAliases(void) {
1003 int i;
1004
1005 if (xmlCharEncodingAliases == NULL)
1006 return;
1007
1008 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1009 if (xmlCharEncodingAliases[i].name != NULL)
1010 xmlFree((char *) xmlCharEncodingAliases[i].name);
1011 if (xmlCharEncodingAliases[i].alias != NULL)
1012 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1013 }
1014 xmlCharEncodingAliasesNb = 0;
1015 xmlCharEncodingAliasesMax = 0;
1016 xmlFree(xmlCharEncodingAliases);
1017 xmlCharEncodingAliases = NULL;
1018 }
1019
1020 /**
1021 * xmlGetEncodingAlias:
1022 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1023 *
1024 * Lookup an encoding name for the given alias.
1025 *
1026 * Returns NULL if not found, otherwise the original name
1027 */
1028 const char *
xmlGetEncodingAlias(const char * alias)1029 xmlGetEncodingAlias(const char *alias) {
1030 int i;
1031 char upper[100];
1032
1033 if (alias == NULL)
1034 return(NULL);
1035
1036 if (xmlCharEncodingAliases == NULL)
1037 return(NULL);
1038
1039 for (i = 0;i < 99;i++) {
1040 upper[i] = toupper(alias[i]);
1041 if (upper[i] == 0) break;
1042 }
1043 upper[i] = 0;
1044
1045 /*
1046 * Walk down the list looking for a definition of the alias
1047 */
1048 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1049 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1050 return(xmlCharEncodingAliases[i].name);
1051 }
1052 }
1053 return(NULL);
1054 }
1055
1056 /**
1057 * xmlAddEncodingAlias:
1058 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1059 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1060 *
1061 * Registers an alias @alias for an encoding named @name. Existing alias
1062 * will be overwritten.
1063 *
1064 * Returns 0 in case of success, -1 in case of error
1065 */
1066 int
xmlAddEncodingAlias(const char * name,const char * alias)1067 xmlAddEncodingAlias(const char *name, const char *alias) {
1068 int i;
1069 char upper[100];
1070
1071 if ((name == NULL) || (alias == NULL))
1072 return(-1);
1073
1074 for (i = 0;i < 99;i++) {
1075 upper[i] = toupper(alias[i]);
1076 if (upper[i] == 0) break;
1077 }
1078 upper[i] = 0;
1079
1080 if (xmlCharEncodingAliases == NULL) {
1081 xmlCharEncodingAliasesNb = 0;
1082 xmlCharEncodingAliasesMax = 20;
1083 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1084 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1085 if (xmlCharEncodingAliases == NULL)
1086 return(-1);
1087 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1088 xmlCharEncodingAliasesMax *= 2;
1089 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1090 xmlRealloc(xmlCharEncodingAliases,
1091 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1092 }
1093 /*
1094 * Walk down the list looking for a definition of the alias
1095 */
1096 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1097 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1098 /*
1099 * Replace the definition.
1100 */
1101 xmlFree((char *) xmlCharEncodingAliases[i].name);
1102 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1103 return(0);
1104 }
1105 }
1106 /*
1107 * Add the definition
1108 */
1109 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1110 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1111 xmlCharEncodingAliasesNb++;
1112 return(0);
1113 }
1114
1115 /**
1116 * xmlDelEncodingAlias:
1117 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1118 *
1119 * Unregisters an encoding alias @alias
1120 *
1121 * Returns 0 in case of success, -1 in case of error
1122 */
1123 int
xmlDelEncodingAlias(const char * alias)1124 xmlDelEncodingAlias(const char *alias) {
1125 int i;
1126
1127 if (alias == NULL)
1128 return(-1);
1129
1130 if (xmlCharEncodingAliases == NULL)
1131 return(-1);
1132 /*
1133 * Walk down the list looking for a definition of the alias
1134 */
1135 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1136 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1137 xmlFree((char *) xmlCharEncodingAliases[i].name);
1138 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1139 xmlCharEncodingAliasesNb--;
1140 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1141 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1142 return(0);
1143 }
1144 }
1145 return(-1);
1146 }
1147
1148 /**
1149 * xmlParseCharEncoding:
1150 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1151 *
1152 * Compare the string to the encoding schemes already known. Note
1153 * that the comparison is case insensitive accordingly to the section
1154 * [XML] 4.3.3 Character Encoding in Entities.
1155 *
1156 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1157 * if not recognized.
1158 */
1159 xmlCharEncoding
xmlParseCharEncoding(const char * name)1160 xmlParseCharEncoding(const char* name)
1161 {
1162 const char *alias;
1163 char upper[500];
1164 int i;
1165
1166 if (name == NULL)
1167 return(XML_CHAR_ENCODING_NONE);
1168
1169 /*
1170 * Do the alias resolution
1171 */
1172 alias = xmlGetEncodingAlias(name);
1173 if (alias != NULL)
1174 name = alias;
1175
1176 for (i = 0;i < 499;i++) {
1177 upper[i] = toupper(name[i]);
1178 if (upper[i] == 0) break;
1179 }
1180 upper[i] = 0;
1181
1182 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1183 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1184 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1185
1186 /*
1187 * NOTE: if we were able to parse this, the endianness of UTF16 is
1188 * already found and in use
1189 */
1190 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1191 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1192
1193 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1194 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1196
1197 /*
1198 * NOTE: if we were able to parse this, the endianness of UCS4 is
1199 * already found and in use
1200 */
1201 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1202 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1204
1205
1206 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1207 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1208 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1209
1210 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1211 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1212 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1213
1214 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1215 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1216 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1217 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1218 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1219 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1220 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1221
1222 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1223 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1224 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1225
1226 #ifdef DEBUG_ENCODING
1227 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1228 #endif
1229 return(XML_CHAR_ENCODING_ERROR);
1230 }
1231
1232 /**
1233 * xmlGetCharEncodingName:
1234 * @enc: the encoding
1235 *
1236 * The "canonical" name for XML encoding.
1237 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1238 * Section 4.3.3 Character Encoding in Entities
1239 *
1240 * Returns the canonical name for the given encoding
1241 */
1242
1243 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1244 xmlGetCharEncodingName(xmlCharEncoding enc) {
1245 switch (enc) {
1246 case XML_CHAR_ENCODING_ERROR:
1247 return(NULL);
1248 case XML_CHAR_ENCODING_NONE:
1249 return(NULL);
1250 case XML_CHAR_ENCODING_UTF8:
1251 return("UTF-8");
1252 case XML_CHAR_ENCODING_UTF16LE:
1253 return("UTF-16");
1254 case XML_CHAR_ENCODING_UTF16BE:
1255 return("UTF-16");
1256 case XML_CHAR_ENCODING_EBCDIC:
1257 return("EBCDIC");
1258 case XML_CHAR_ENCODING_UCS4LE:
1259 return("ISO-10646-UCS-4");
1260 case XML_CHAR_ENCODING_UCS4BE:
1261 return("ISO-10646-UCS-4");
1262 case XML_CHAR_ENCODING_UCS4_2143:
1263 return("ISO-10646-UCS-4");
1264 case XML_CHAR_ENCODING_UCS4_3412:
1265 return("ISO-10646-UCS-4");
1266 case XML_CHAR_ENCODING_UCS2:
1267 return("ISO-10646-UCS-2");
1268 case XML_CHAR_ENCODING_8859_1:
1269 return("ISO-8859-1");
1270 case XML_CHAR_ENCODING_8859_2:
1271 return("ISO-8859-2");
1272 case XML_CHAR_ENCODING_8859_3:
1273 return("ISO-8859-3");
1274 case XML_CHAR_ENCODING_8859_4:
1275 return("ISO-8859-4");
1276 case XML_CHAR_ENCODING_8859_5:
1277 return("ISO-8859-5");
1278 case XML_CHAR_ENCODING_8859_6:
1279 return("ISO-8859-6");
1280 case XML_CHAR_ENCODING_8859_7:
1281 return("ISO-8859-7");
1282 case XML_CHAR_ENCODING_8859_8:
1283 return("ISO-8859-8");
1284 case XML_CHAR_ENCODING_8859_9:
1285 return("ISO-8859-9");
1286 case XML_CHAR_ENCODING_2022_JP:
1287 return("ISO-2022-JP");
1288 case XML_CHAR_ENCODING_SHIFT_JIS:
1289 return("Shift-JIS");
1290 case XML_CHAR_ENCODING_EUC_JP:
1291 return("EUC-JP");
1292 case XML_CHAR_ENCODING_ASCII:
1293 return(NULL);
1294 }
1295 return(NULL);
1296 }
1297
1298 /************************************************************************
1299 * *
1300 * Char encoding handlers *
1301 * *
1302 ************************************************************************/
1303
1304
1305 /* the size should be growable, but it's not a big deal ... */
1306 #define MAX_ENCODING_HANDLERS 50
1307 static xmlCharEncodingHandlerPtr *handlers = NULL;
1308 static int nbCharEncodingHandler = 0;
1309
1310 /*
1311 * The default is UTF-8 for XML, that's also the default used for the
1312 * parser internals, so the default encoding handler is NULL
1313 */
1314
1315 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1316
1317 /**
1318 * xmlNewCharEncodingHandler:
1319 * @name: the encoding name, in UTF-8 format (ASCII actually)
1320 * @input: the xmlCharEncodingInputFunc to read that encoding
1321 * @output: the xmlCharEncodingOutputFunc to write that encoding
1322 *
1323 * Create and registers an xmlCharEncodingHandler.
1324 *
1325 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1326 */
1327 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1328 xmlNewCharEncodingHandler(const char *name,
1329 xmlCharEncodingInputFunc input,
1330 xmlCharEncodingOutputFunc output) {
1331 xmlCharEncodingHandlerPtr handler;
1332 const char *alias;
1333 char upper[500];
1334 int i;
1335 char *up = NULL;
1336
1337 /*
1338 * Do the alias resolution
1339 */
1340 alias = xmlGetEncodingAlias(name);
1341 if (alias != NULL)
1342 name = alias;
1343
1344 /*
1345 * Keep only the uppercase version of the encoding.
1346 */
1347 if (name == NULL) {
1348 xmlEncodingErr(XML_I18N_NO_NAME,
1349 "xmlNewCharEncodingHandler : no name !\n", NULL);
1350 return(NULL);
1351 }
1352 for (i = 0;i < 499;i++) {
1353 upper[i] = toupper(name[i]);
1354 if (upper[i] == 0) break;
1355 }
1356 upper[i] = 0;
1357 up = xmlMemStrdup(upper);
1358 if (up == NULL) {
1359 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1360 return(NULL);
1361 }
1362
1363 /*
1364 * allocate and fill-up an handler block.
1365 */
1366 handler = (xmlCharEncodingHandlerPtr)
1367 xmlMalloc(sizeof(xmlCharEncodingHandler));
1368 if (handler == NULL) {
1369 xmlFree(up);
1370 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1371 return(NULL);
1372 }
1373 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1374 handler->input = input;
1375 handler->output = output;
1376 handler->name = up;
1377
1378 #ifdef LIBXML_ICONV_ENABLED
1379 handler->iconv_in = NULL;
1380 handler->iconv_out = NULL;
1381 #endif
1382 #ifdef LIBXML_ICU_ENABLED
1383 handler->uconv_in = NULL;
1384 handler->uconv_out = NULL;
1385 #endif
1386
1387 /*
1388 * registers and returns the handler.
1389 */
1390 xmlRegisterCharEncodingHandler(handler);
1391 #ifdef DEBUG_ENCODING
1392 xmlGenericError(xmlGenericErrorContext,
1393 "Registered encoding handler for %s\n", name);
1394 #endif
1395 return(handler);
1396 }
1397
1398 /**
1399 * xmlInitCharEncodingHandlers:
1400 *
1401 * Initialize the char encoding support, it registers the default
1402 * encoding supported.
1403 * NOTE: while public, this function usually doesn't need to be called
1404 * in normal processing.
1405 */
1406 void
xmlInitCharEncodingHandlers(void)1407 xmlInitCharEncodingHandlers(void) {
1408 unsigned short int tst = 0x1234;
1409 unsigned char *ptr = (unsigned char *) &tst;
1410
1411 if (handlers != NULL) return;
1412
1413 handlers = (xmlCharEncodingHandlerPtr *)
1414 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1415
1416 if (*ptr == 0x12) xmlLittleEndian = 0;
1417 else if (*ptr == 0x34) xmlLittleEndian = 1;
1418 else {
1419 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1420 "Odd problem at endianness detection\n", NULL);
1421 }
1422
1423 if (handlers == NULL) {
1424 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1425 return;
1426 }
1427 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1428 #ifdef LIBXML_OUTPUT_ENABLED
1429 xmlUTF16LEHandler =
1430 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1431 xmlUTF16BEHandler =
1432 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1433 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1434 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1435 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1436 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1437 #ifdef LIBXML_HTML_ENABLED
1438 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1439 #endif
1440 #else
1441 xmlUTF16LEHandler =
1442 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1443 xmlUTF16BEHandler =
1444 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1445 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1446 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1447 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1448 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1449 #endif /* LIBXML_OUTPUT_ENABLED */
1450 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1451 #ifdef LIBXML_ISO8859X_ENABLED
1452 xmlRegisterCharEncodingHandlersISO8859x ();
1453 #endif
1454 #endif
1455
1456 }
1457
1458 /**
1459 * xmlCleanupCharEncodingHandlers:
1460 *
1461 * Cleanup the memory allocated for the char encoding support, it
1462 * unregisters all the encoding handlers and the aliases.
1463 */
1464 void
xmlCleanupCharEncodingHandlers(void)1465 xmlCleanupCharEncodingHandlers(void) {
1466 xmlCleanupEncodingAliases();
1467
1468 if (handlers == NULL) return;
1469
1470 for (;nbCharEncodingHandler > 0;) {
1471 nbCharEncodingHandler--;
1472 if (handlers[nbCharEncodingHandler] != NULL) {
1473 if (handlers[nbCharEncodingHandler]->name != NULL)
1474 xmlFree(handlers[nbCharEncodingHandler]->name);
1475 xmlFree(handlers[nbCharEncodingHandler]);
1476 }
1477 }
1478 xmlFree(handlers);
1479 handlers = NULL;
1480 nbCharEncodingHandler = 0;
1481 xmlDefaultCharEncodingHandler = NULL;
1482 }
1483
1484 /**
1485 * xmlRegisterCharEncodingHandler:
1486 * @handler: the xmlCharEncodingHandlerPtr handler block
1487 *
1488 * Register the char encoding handler, surprising, isn't it ?
1489 */
1490 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1491 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1492 if (handlers == NULL) xmlInitCharEncodingHandlers();
1493 if ((handler == NULL) || (handlers == NULL)) {
1494 xmlEncodingErr(XML_I18N_NO_HANDLER,
1495 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1496 goto free_handler;
1497 }
1498
1499 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1500 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1501 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1502 "MAX_ENCODING_HANDLERS");
1503 goto free_handler;
1504 }
1505 handlers[nbCharEncodingHandler++] = handler;
1506 return;
1507
1508 free_handler:
1509 if (handler != NULL) {
1510 if (handler->name != NULL) {
1511 xmlFree(handler->name);
1512 }
1513 xmlFree(handler);
1514 }
1515 }
1516
1517 /**
1518 * xmlGetCharEncodingHandler:
1519 * @enc: an xmlCharEncoding value.
1520 *
1521 * Search in the registered set the handler able to read/write that encoding.
1522 *
1523 * Returns the handler or NULL if not found
1524 */
1525 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1526 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1527 xmlCharEncodingHandlerPtr handler;
1528
1529 if (handlers == NULL) xmlInitCharEncodingHandlers();
1530 switch (enc) {
1531 case XML_CHAR_ENCODING_ERROR:
1532 return(NULL);
1533 case XML_CHAR_ENCODING_NONE:
1534 return(NULL);
1535 case XML_CHAR_ENCODING_UTF8:
1536 return(NULL);
1537 case XML_CHAR_ENCODING_UTF16LE:
1538 return(xmlUTF16LEHandler);
1539 case XML_CHAR_ENCODING_UTF16BE:
1540 return(xmlUTF16BEHandler);
1541 case XML_CHAR_ENCODING_EBCDIC:
1542 handler = xmlFindCharEncodingHandler("EBCDIC");
1543 if (handler != NULL) return(handler);
1544 handler = xmlFindCharEncodingHandler("ebcdic");
1545 if (handler != NULL) return(handler);
1546 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1547 if (handler != NULL) return(handler);
1548 handler = xmlFindCharEncodingHandler("IBM-037");
1549 if (handler != NULL) return(handler);
1550 break;
1551 case XML_CHAR_ENCODING_UCS4BE:
1552 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1553 if (handler != NULL) return(handler);
1554 handler = xmlFindCharEncodingHandler("UCS-4");
1555 if (handler != NULL) return(handler);
1556 handler = xmlFindCharEncodingHandler("UCS4");
1557 if (handler != NULL) return(handler);
1558 break;
1559 case XML_CHAR_ENCODING_UCS4LE:
1560 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1561 if (handler != NULL) return(handler);
1562 handler = xmlFindCharEncodingHandler("UCS-4");
1563 if (handler != NULL) return(handler);
1564 handler = xmlFindCharEncodingHandler("UCS4");
1565 if (handler != NULL) return(handler);
1566 break;
1567 case XML_CHAR_ENCODING_UCS4_2143:
1568 break;
1569 case XML_CHAR_ENCODING_UCS4_3412:
1570 break;
1571 case XML_CHAR_ENCODING_UCS2:
1572 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1573 if (handler != NULL) return(handler);
1574 handler = xmlFindCharEncodingHandler("UCS-2");
1575 if (handler != NULL) return(handler);
1576 handler = xmlFindCharEncodingHandler("UCS2");
1577 if (handler != NULL) return(handler);
1578 break;
1579
1580 /*
1581 * We used to keep ISO Latin encodings native in the
1582 * generated data. This led to so many problems that
1583 * this has been removed. One can still change this
1584 * back by registering no-ops encoders for those
1585 */
1586 case XML_CHAR_ENCODING_8859_1:
1587 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1588 if (handler != NULL) return(handler);
1589 break;
1590 case XML_CHAR_ENCODING_8859_2:
1591 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1592 if (handler != NULL) return(handler);
1593 break;
1594 case XML_CHAR_ENCODING_8859_3:
1595 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1596 if (handler != NULL) return(handler);
1597 break;
1598 case XML_CHAR_ENCODING_8859_4:
1599 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1600 if (handler != NULL) return(handler);
1601 break;
1602 case XML_CHAR_ENCODING_8859_5:
1603 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1604 if (handler != NULL) return(handler);
1605 break;
1606 case XML_CHAR_ENCODING_8859_6:
1607 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1608 if (handler != NULL) return(handler);
1609 break;
1610 case XML_CHAR_ENCODING_8859_7:
1611 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1612 if (handler != NULL) return(handler);
1613 break;
1614 case XML_CHAR_ENCODING_8859_8:
1615 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1616 if (handler != NULL) return(handler);
1617 break;
1618 case XML_CHAR_ENCODING_8859_9:
1619 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1620 if (handler != NULL) return(handler);
1621 break;
1622
1623
1624 case XML_CHAR_ENCODING_2022_JP:
1625 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1626 if (handler != NULL) return(handler);
1627 break;
1628 case XML_CHAR_ENCODING_SHIFT_JIS:
1629 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1630 if (handler != NULL) return(handler);
1631 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1632 if (handler != NULL) return(handler);
1633 handler = xmlFindCharEncodingHandler("Shift_JIS");
1634 if (handler != NULL) return(handler);
1635 break;
1636 case XML_CHAR_ENCODING_EUC_JP:
1637 handler = xmlFindCharEncodingHandler("EUC-JP");
1638 if (handler != NULL) return(handler);
1639 break;
1640 default:
1641 break;
1642 }
1643
1644 #ifdef DEBUG_ENCODING
1645 xmlGenericError(xmlGenericErrorContext,
1646 "No handler found for encoding %d\n", enc);
1647 #endif
1648 return(NULL);
1649 }
1650
1651 /**
1652 * xmlFindCharEncodingHandler:
1653 * @name: a string describing the char encoding.
1654 *
1655 * Search in the registered set the handler able to read/write that encoding.
1656 *
1657 * Returns the handler or NULL if not found
1658 */
1659 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1660 xmlFindCharEncodingHandler(const char *name) {
1661 const char *nalias;
1662 const char *norig;
1663 xmlCharEncoding alias;
1664 #ifdef LIBXML_ICONV_ENABLED
1665 xmlCharEncodingHandlerPtr enc;
1666 iconv_t icv_in, icv_out;
1667 #endif /* LIBXML_ICONV_ENABLED */
1668 #ifdef LIBXML_ICU_ENABLED
1669 xmlCharEncodingHandlerPtr encu;
1670 uconv_t *ucv_in, *ucv_out;
1671 #endif /* LIBXML_ICU_ENABLED */
1672 char upper[100];
1673 int i;
1674
1675 if (handlers == NULL) xmlInitCharEncodingHandlers();
1676 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1677 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1678
1679 /*
1680 * Do the alias resolution
1681 */
1682 norig = name;
1683 nalias = xmlGetEncodingAlias(name);
1684 if (nalias != NULL)
1685 name = nalias;
1686
1687 /*
1688 * Check first for directly registered encoding names
1689 */
1690 for (i = 0;i < 99;i++) {
1691 upper[i] = toupper(name[i]);
1692 if (upper[i] == 0) break;
1693 }
1694 upper[i] = 0;
1695
1696 if (handlers != NULL) {
1697 for (i = 0;i < nbCharEncodingHandler; i++) {
1698 if (!strcmp(upper, handlers[i]->name)) {
1699 #ifdef DEBUG_ENCODING
1700 xmlGenericError(xmlGenericErrorContext,
1701 "Found registered handler for encoding %s\n", name);
1702 #endif
1703 return(handlers[i]);
1704 }
1705 }
1706 }
1707
1708 #ifdef LIBXML_ICONV_ENABLED
1709 /* check whether iconv can handle this */
1710 icv_in = iconv_open("UTF-8", name);
1711 icv_out = iconv_open(name, "UTF-8");
1712 if (icv_in == (iconv_t) -1) {
1713 icv_in = iconv_open("UTF-8", upper);
1714 }
1715 if (icv_out == (iconv_t) -1) {
1716 icv_out = iconv_open(upper, "UTF-8");
1717 }
1718 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1719 enc = (xmlCharEncodingHandlerPtr)
1720 xmlMalloc(sizeof(xmlCharEncodingHandler));
1721 if (enc == NULL) {
1722 iconv_close(icv_in);
1723 iconv_close(icv_out);
1724 return(NULL);
1725 }
1726 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1727 enc->name = xmlMemStrdup(name);
1728 enc->input = NULL;
1729 enc->output = NULL;
1730 enc->iconv_in = icv_in;
1731 enc->iconv_out = icv_out;
1732 #ifdef DEBUG_ENCODING
1733 xmlGenericError(xmlGenericErrorContext,
1734 "Found iconv handler for encoding %s\n", name);
1735 #endif
1736 return enc;
1737 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1738 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1739 "iconv : problems with filters for '%s'\n", name);
1740 }
1741 #endif /* LIBXML_ICONV_ENABLED */
1742 #ifdef LIBXML_ICU_ENABLED
1743 /* check whether icu can handle this */
1744 ucv_in = openIcuConverter(name, 1);
1745 ucv_out = openIcuConverter(name, 0);
1746 if (ucv_in != NULL && ucv_out != NULL) {
1747 encu = (xmlCharEncodingHandlerPtr)
1748 xmlMalloc(sizeof(xmlCharEncodingHandler));
1749 if (encu == NULL) {
1750 closeIcuConverter(ucv_in);
1751 closeIcuConverter(ucv_out);
1752 return(NULL);
1753 }
1754 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1755 encu->name = xmlMemStrdup(name);
1756 encu->input = NULL;
1757 encu->output = NULL;
1758 encu->uconv_in = ucv_in;
1759 encu->uconv_out = ucv_out;
1760 #ifdef DEBUG_ENCODING
1761 xmlGenericError(xmlGenericErrorContext,
1762 "Found ICU converter handler for encoding %s\n", name);
1763 #endif
1764 return encu;
1765 } else if (ucv_in != NULL || ucv_out != NULL) {
1766 closeIcuConverter(ucv_in);
1767 closeIcuConverter(ucv_out);
1768 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1769 "ICU converter : problems with filters for '%s'\n", name);
1770 }
1771 #endif /* LIBXML_ICU_ENABLED */
1772
1773 #ifdef DEBUG_ENCODING
1774 xmlGenericError(xmlGenericErrorContext,
1775 "No handler found for encoding %s\n", name);
1776 #endif
1777
1778 /*
1779 * Fallback using the canonical names
1780 */
1781 alias = xmlParseCharEncoding(norig);
1782 if (alias != XML_CHAR_ENCODING_ERROR) {
1783 const char* canon;
1784 canon = xmlGetCharEncodingName(alias);
1785 if ((canon != NULL) && (strcmp(name, canon))) {
1786 return(xmlFindCharEncodingHandler(canon));
1787 }
1788 }
1789
1790 /* If "none of the above", give up */
1791 return(NULL);
1792 }
1793
1794 /************************************************************************
1795 * *
1796 * ICONV based generic conversion functions *
1797 * *
1798 ************************************************************************/
1799
1800 #ifdef LIBXML_ICONV_ENABLED
1801 /**
1802 * xmlIconvWrapper:
1803 * @cd: iconv converter data structure
1804 * @out: a pointer to an array of bytes to store the result
1805 * @outlen: the length of @out
1806 * @in: a pointer to an array of input bytes
1807 * @inlen: the length of @in
1808 *
1809 * Returns 0 if success, or
1810 * -1 by lack of space, or
1811 * -2 if the transcoding fails (for *in is not valid utf8 string or
1812 * the result of transformation can't fit into the encoding we want), or
1813 * -3 if there the last byte can't form a single output char.
1814 *
1815 * The value of @inlen after return is the number of octets consumed
1816 * as the return value is positive, else unpredictable.
1817 * The value of @outlen after return is the number of octets produced.
1818 */
1819 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1820 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1821 const unsigned char *in, int *inlen) {
1822 size_t icv_inlen, icv_outlen;
1823 const char *icv_in = (const char *) in;
1824 char *icv_out = (char *) out;
1825 int ret;
1826
1827 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1828 if (outlen != NULL) *outlen = 0;
1829 return(-1);
1830 }
1831 icv_inlen = *inlen;
1832 icv_outlen = *outlen;
1833 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1834 *inlen -= icv_inlen;
1835 *outlen -= icv_outlen;
1836 if ((icv_inlen != 0) || (ret == -1)) {
1837 #ifdef EILSEQ
1838 if (errno == EILSEQ) {
1839 return -2;
1840 } else
1841 #endif
1842 #ifdef E2BIG
1843 if (errno == E2BIG) {
1844 return -1;
1845 } else
1846 #endif
1847 #ifdef EINVAL
1848 if (errno == EINVAL) {
1849 return -3;
1850 } else
1851 #endif
1852 {
1853 return -3;
1854 }
1855 }
1856 return 0;
1857 }
1858 #endif /* LIBXML_ICONV_ENABLED */
1859
1860 /************************************************************************
1861 * *
1862 * ICU based generic conversion functions *
1863 * *
1864 ************************************************************************/
1865
1866 #ifdef LIBXML_ICU_ENABLED
1867 /**
1868 * xmlUconvWrapper:
1869 * @cd: ICU uconverter data structure
1870 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1871 * @out: a pointer to an array of bytes to store the result
1872 * @outlen: the length of @out
1873 * @in: a pointer to an array of input bytes
1874 * @inlen: the length of @in
1875 * @flush: if true, indicates end of input
1876 *
1877 * Returns 0 if success, or
1878 * -1 by lack of space, or
1879 * -2 if the transcoding fails (for *in is not valid utf8 string or
1880 * the result of transformation can't fit into the encoding we want), or
1881 * -3 if there the last byte can't form a single output char.
1882 *
1883 * The value of @inlen after return is the number of octets consumed
1884 * as the return value is positive, else unpredictable.
1885 * The value of @outlen after return is the number of octets produced.
1886 */
1887 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1888 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1889 const unsigned char *in, int *inlen, int flush) {
1890 const char *ucv_in = (const char *) in;
1891 char *ucv_out = (char *) out;
1892 UErrorCode err = U_ZERO_ERROR;
1893
1894 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1895 if (outlen != NULL) *outlen = 0;
1896 return(-1);
1897 }
1898
1899 if (toUnicode) {
1900 /* encoding => UTF-16 => UTF-8 */
1901 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1902 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1903 &cd->pivot_source, &cd->pivot_target,
1904 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1905 } else {
1906 /* UTF-8 => UTF-16 => encoding */
1907 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1908 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1909 &cd->pivot_source, &cd->pivot_target,
1910 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1911 }
1912 *inlen = ucv_in - (const char*) in;
1913 *outlen = ucv_out - (char *) out;
1914 if (U_SUCCESS(err)) {
1915 /* reset pivot buf if this is the last call for input (flush==TRUE) */
1916 if (flush)
1917 cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1918 return 0;
1919 }
1920 if (err == U_BUFFER_OVERFLOW_ERROR)
1921 return -1;
1922 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1923 return -2;
1924 return -3;
1925 }
1926 #endif /* LIBXML_ICU_ENABLED */
1927
1928 /************************************************************************
1929 * *
1930 * The real API used by libxml for on-the-fly conversion *
1931 * *
1932 ************************************************************************/
1933
1934 /**
1935 * xmlEncInputChunk:
1936 * @handler: encoding handler
1937 * @out: a pointer to an array of bytes to store the result
1938 * @outlen: the length of @out
1939 * @in: a pointer to an array of input bytes
1940 * @inlen: the length of @in
1941 * @flush: flush (ICU-related)
1942 *
1943 * Returns 0 if success, or
1944 * -1 by lack of space, or
1945 * -2 if the transcoding fails (for *in is not valid utf8 string or
1946 * the result of transformation can't fit into the encoding we want), or
1947 * -3 if there the last byte can't form a single output char.
1948 *
1949 * The value of @inlen after return is the number of octets consumed
1950 * as the return value is 0, else unpredictable.
1951 * The value of @outlen after return is the number of octets produced.
1952 */
1953 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1954 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1955 int *outlen, const unsigned char *in, int *inlen, int flush) {
1956 int ret;
1957 (void)flush;
1958
1959 if (handler->input != NULL) {
1960 ret = handler->input(out, outlen, in, inlen);
1961 if (ret > 0)
1962 ret = 0;
1963 }
1964 #ifdef LIBXML_ICONV_ENABLED
1965 else if (handler->iconv_in != NULL) {
1966 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1967 }
1968 #endif /* LIBXML_ICONV_ENABLED */
1969 #ifdef LIBXML_ICU_ENABLED
1970 else if (handler->uconv_in != NULL) {
1971 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1972 flush);
1973 }
1974 #endif /* LIBXML_ICU_ENABLED */
1975 else {
1976 *outlen = 0;
1977 *inlen = 0;
1978 ret = -2;
1979 }
1980
1981 return(ret);
1982 }
1983
1984 /**
1985 * xmlEncOutputChunk:
1986 * @handler: encoding handler
1987 * @out: a pointer to an array of bytes to store the result
1988 * @outlen: the length of @out
1989 * @in: a pointer to an array of input bytes
1990 * @inlen: the length of @in
1991 *
1992 * Returns 0 if success, or
1993 * -1 by lack of space, or
1994 * -2 if the transcoding fails (for *in is not valid utf8 string or
1995 * the result of transformation can't fit into the encoding we want), or
1996 * -3 if there the last byte can't form a single output char.
1997 * -4 if no output function was found.
1998 *
1999 * The value of @inlen after return is the number of octets consumed
2000 * as the return value is 0, else unpredictable.
2001 * The value of @outlen after return is the number of octets produced.
2002 */
2003 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2004 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2005 int *outlen, const unsigned char *in, int *inlen) {
2006 int ret;
2007
2008 if (handler->output != NULL) {
2009 ret = handler->output(out, outlen, in, inlen);
2010 if (ret > 0)
2011 ret = 0;
2012 }
2013 #ifdef LIBXML_ICONV_ENABLED
2014 else if (handler->iconv_out != NULL) {
2015 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2016 }
2017 #endif /* LIBXML_ICONV_ENABLED */
2018 #ifdef LIBXML_ICU_ENABLED
2019 else if (handler->uconv_out != NULL) {
2020 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2021 1);
2022 }
2023 #endif /* LIBXML_ICU_ENABLED */
2024 else {
2025 *outlen = 0;
2026 *inlen = 0;
2027 ret = -4;
2028 }
2029
2030 return(ret);
2031 }
2032
2033 /**
2034 * xmlCharEncFirstLineInt:
2035 * @handler: char encoding transformation data structure
2036 * @out: an xmlBuffer for the output.
2037 * @in: an xmlBuffer for the input
2038 * @len: number of bytes to convert for the first line, or -1
2039 *
2040 * Front-end for the encoding handler input function, but handle only
2041 * the very first line, i.e. limit itself to 45 chars.
2042 *
2043 * Returns the number of byte written if success, or
2044 * -1 general error
2045 * -2 if the transcoding fails (for *in is not valid utf8 string or
2046 * the result of transformation can't fit into the encoding we want), or
2047 */
2048 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2049 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2050 xmlBufferPtr in, int len) {
2051 int ret;
2052 int written;
2053 int toconv;
2054
2055 if (handler == NULL) return(-1);
2056 if (out == NULL) return(-1);
2057 if (in == NULL) return(-1);
2058
2059 /* calculate space available */
2060 written = out->size - out->use - 1; /* count '\0' */
2061 toconv = in->use;
2062 /*
2063 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2064 * 45 chars should be sufficient to reach the end of the encoding
2065 * declaration without going too far inside the document content.
2066 * on UTF-16 this means 90bytes, on UCS4 this means 180
2067 * The actual value depending on guessed encoding is passed as @len
2068 * if provided
2069 */
2070 if (len >= 0) {
2071 if (toconv > len)
2072 toconv = len;
2073 } else {
2074 if (toconv > 180)
2075 toconv = 180;
2076 }
2077 if (toconv * 2 >= written) {
2078 xmlBufferGrow(out, toconv * 2);
2079 written = out->size - out->use - 1;
2080 }
2081
2082 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2083 in->content, &toconv, 0);
2084 xmlBufferShrink(in, toconv);
2085 out->use += written;
2086 out->content[out->use] = 0;
2087 if (ret == -1) ret = -3;
2088
2089 #ifdef DEBUG_ENCODING
2090 switch (ret) {
2091 case 0:
2092 xmlGenericError(xmlGenericErrorContext,
2093 "converted %d bytes to %d bytes of input\n",
2094 toconv, written);
2095 break;
2096 case -1:
2097 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2098 toconv, written, in->use);
2099 break;
2100 case -2:
2101 xmlGenericError(xmlGenericErrorContext,
2102 "input conversion failed due to input error\n");
2103 break;
2104 case -3:
2105 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2106 toconv, written, in->use);
2107 break;
2108 default:
2109 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2110 }
2111 #endif /* DEBUG_ENCODING */
2112 /*
2113 * Ignore when input buffer is not on a boundary
2114 */
2115 if (ret == -3) ret = 0;
2116 if (ret == -1) ret = 0;
2117 return(written ? written : ret);
2118 }
2119
2120 /**
2121 * xmlCharEncFirstLine:
2122 * @handler: char encoding transformation data structure
2123 * @out: an xmlBuffer for the output.
2124 * @in: an xmlBuffer for the input
2125 *
2126 * Front-end for the encoding handler input function, but handle only
2127 * the very first line, i.e. limit itself to 45 chars.
2128 *
2129 * Returns the number of byte written if success, or
2130 * -1 general error
2131 * -2 if the transcoding fails (for *in is not valid utf8 string or
2132 * the result of transformation can't fit into the encoding we want), or
2133 */
2134 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2135 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2136 xmlBufferPtr in) {
2137 return(xmlCharEncFirstLineInt(handler, out, in, -1));
2138 }
2139
2140 /**
2141 * xmlCharEncFirstLineInput:
2142 * @input: a parser input buffer
2143 * @len: number of bytes to convert for the first line, or -1
2144 *
2145 * Front-end for the encoding handler input function, but handle only
2146 * the very first line. Point is that this is based on autodetection
2147 * of the encoding and once that first line is converted we may find
2148 * out that a different decoder is needed to process the input.
2149 *
2150 * Returns the number of byte written if success, or
2151 * -1 general error
2152 * -2 if the transcoding fails (for *in is not valid utf8 string or
2153 * the result of transformation can't fit into the encoding we want), or
2154 */
2155 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2156 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2157 {
2158 int ret;
2159 size_t written;
2160 size_t toconv;
2161 int c_in;
2162 int c_out;
2163 xmlBufPtr in;
2164 xmlBufPtr out;
2165
2166 if ((input == NULL) || (input->encoder == NULL) ||
2167 (input->buffer == NULL) || (input->raw == NULL))
2168 return (-1);
2169 out = input->buffer;
2170 in = input->raw;
2171
2172 toconv = xmlBufUse(in);
2173 if (toconv == 0)
2174 return (0);
2175 written = xmlBufAvail(out) - 1; /* count '\0' */
2176 /*
2177 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2178 * 45 chars should be sufficient to reach the end of the encoding
2179 * declaration without going too far inside the document content.
2180 * on UTF-16 this means 90bytes, on UCS4 this means 180
2181 * The actual value depending on guessed encoding is passed as @len
2182 * if provided
2183 */
2184 if (len >= 0) {
2185 if (toconv > (unsigned int) len)
2186 toconv = len;
2187 } else {
2188 if (toconv > 180)
2189 toconv = 180;
2190 }
2191 if (toconv * 2 >= written) {
2192 xmlBufGrow(out, toconv * 2);
2193 written = xmlBufAvail(out) - 1;
2194 }
2195 if (written > 360)
2196 written = 360;
2197
2198 c_in = toconv;
2199 c_out = written;
2200 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2201 xmlBufContent(in), &c_in, 0);
2202 xmlBufShrink(in, c_in);
2203 xmlBufAddLen(out, c_out);
2204 if (ret == -1)
2205 ret = -3;
2206
2207 switch (ret) {
2208 case 0:
2209 #ifdef DEBUG_ENCODING
2210 xmlGenericError(xmlGenericErrorContext,
2211 "converted %d bytes to %d bytes of input\n",
2212 c_in, c_out);
2213 #endif
2214 break;
2215 case -1:
2216 #ifdef DEBUG_ENCODING
2217 xmlGenericError(xmlGenericErrorContext,
2218 "converted %d bytes to %d bytes of input, %d left\n",
2219 c_in, c_out, (int)xmlBufUse(in));
2220 #endif
2221 break;
2222 case -3:
2223 #ifdef DEBUG_ENCODING
2224 xmlGenericError(xmlGenericErrorContext,
2225 "converted %d bytes to %d bytes of input, %d left\n",
2226 c_in, c_out, (int)xmlBufUse(in));
2227 #endif
2228 break;
2229 case -2: {
2230 char buf[50];
2231 const xmlChar *content = xmlBufContent(in);
2232
2233 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2234 content[0], content[1],
2235 content[2], content[3]);
2236 buf[49] = 0;
2237 xmlEncodingErr(XML_I18N_CONV_FAILED,
2238 "input conversion failed due to input error, bytes %s\n",
2239 buf);
2240 }
2241 }
2242 /*
2243 * Ignore when input buffer is not on a boundary
2244 */
2245 if (ret == -3) ret = 0;
2246 if (ret == -1) ret = 0;
2247 return(c_out ? c_out : ret);
2248 }
2249
2250 /**
2251 * xmlCharEncInput:
2252 * @input: a parser input buffer
2253 * @flush: try to flush all the raw buffer
2254 *
2255 * Generic front-end for the encoding handler on parser input
2256 *
2257 * Returns the number of byte written if success, or
2258 * -1 general error
2259 * -2 if the transcoding fails (for *in is not valid utf8 string or
2260 * the result of transformation can't fit into the encoding we want), or
2261 */
2262 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2263 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2264 {
2265 int ret;
2266 size_t written;
2267 size_t toconv;
2268 int c_in;
2269 int c_out;
2270 xmlBufPtr in;
2271 xmlBufPtr out;
2272
2273 if ((input == NULL) || (input->encoder == NULL) ||
2274 (input->buffer == NULL) || (input->raw == NULL))
2275 return (-1);
2276 out = input->buffer;
2277 in = input->raw;
2278
2279 toconv = xmlBufUse(in);
2280 if (toconv == 0)
2281 return (0);
2282 if ((toconv > 64 * 1024) && (flush == 0))
2283 toconv = 64 * 1024;
2284 written = xmlBufAvail(out);
2285 if (written > 0)
2286 written--; /* count '\0' */
2287 if (toconv * 2 >= written) {
2288 xmlBufGrow(out, toconv * 2);
2289 written = xmlBufAvail(out);
2290 if (written > 0)
2291 written--; /* count '\0' */
2292 }
2293 if ((written > 128 * 1024) && (flush == 0))
2294 written = 128 * 1024;
2295
2296 c_in = toconv;
2297 c_out = written;
2298 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2299 xmlBufContent(in), &c_in, flush);
2300 xmlBufShrink(in, c_in);
2301 xmlBufAddLen(out, c_out);
2302 if (ret == -1)
2303 ret = -3;
2304
2305 switch (ret) {
2306 case 0:
2307 #ifdef DEBUG_ENCODING
2308 xmlGenericError(xmlGenericErrorContext,
2309 "converted %d bytes to %d bytes of input\n",
2310 c_in, c_out);
2311 #endif
2312 break;
2313 case -1:
2314 #ifdef DEBUG_ENCODING
2315 xmlGenericError(xmlGenericErrorContext,
2316 "converted %d bytes to %d bytes of input, %d left\n",
2317 c_in, c_out, (int)xmlBufUse(in));
2318 #endif
2319 break;
2320 case -3:
2321 #ifdef DEBUG_ENCODING
2322 xmlGenericError(xmlGenericErrorContext,
2323 "converted %d bytes to %d bytes of input, %d left\n",
2324 c_in, c_out, (int)xmlBufUse(in));
2325 #endif
2326 break;
2327 case -2: {
2328 char buf[50];
2329 const xmlChar *content = xmlBufContent(in);
2330
2331 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2332 content[0], content[1],
2333 content[2], content[3]);
2334 buf[49] = 0;
2335 xmlEncodingErr(XML_I18N_CONV_FAILED,
2336 "input conversion failed due to input error, bytes %s\n",
2337 buf);
2338 }
2339 }
2340 /*
2341 * Ignore when input buffer is not on a boundary
2342 */
2343 if (ret == -3)
2344 ret = 0;
2345 return (c_out? c_out : ret);
2346 }
2347
2348 /**
2349 * xmlCharEncInFunc:
2350 * @handler: char encoding transformation data structure
2351 * @out: an xmlBuffer for the output.
2352 * @in: an xmlBuffer for the input
2353 *
2354 * Generic front-end for the encoding handler input function
2355 *
2356 * Returns the number of byte written if success, or
2357 * -1 general error
2358 * -2 if the transcoding fails (for *in is not valid utf8 string or
2359 * the result of transformation can't fit into the encoding we want), or
2360 */
2361 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2362 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2363 xmlBufferPtr in)
2364 {
2365 int ret;
2366 int written;
2367 int toconv;
2368
2369 if (handler == NULL)
2370 return (-1);
2371 if (out == NULL)
2372 return (-1);
2373 if (in == NULL)
2374 return (-1);
2375
2376 toconv = in->use;
2377 if (toconv == 0)
2378 return (0);
2379 written = out->size - out->use -1; /* count '\0' */
2380 if (toconv * 2 >= written) {
2381 xmlBufferGrow(out, out->size + toconv * 2);
2382 written = out->size - out->use - 1;
2383 }
2384 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2385 in->content, &toconv, 1);
2386 xmlBufferShrink(in, toconv);
2387 out->use += written;
2388 out->content[out->use] = 0;
2389 if (ret == -1)
2390 ret = -3;
2391
2392 switch (ret) {
2393 case 0:
2394 #ifdef DEBUG_ENCODING
2395 xmlGenericError(xmlGenericErrorContext,
2396 "converted %d bytes to %d bytes of input\n",
2397 toconv, written);
2398 #endif
2399 break;
2400 case -1:
2401 #ifdef DEBUG_ENCODING
2402 xmlGenericError(xmlGenericErrorContext,
2403 "converted %d bytes to %d bytes of input, %d left\n",
2404 toconv, written, in->use);
2405 #endif
2406 break;
2407 case -3:
2408 #ifdef DEBUG_ENCODING
2409 xmlGenericError(xmlGenericErrorContext,
2410 "converted %d bytes to %d bytes of input, %d left\n",
2411 toconv, written, in->use);
2412 #endif
2413 break;
2414 case -2: {
2415 char buf[50];
2416
2417 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2418 in->content[0], in->content[1],
2419 in->content[2], in->content[3]);
2420 buf[49] = 0;
2421 xmlEncodingErr(XML_I18N_CONV_FAILED,
2422 "input conversion failed due to input error, bytes %s\n",
2423 buf);
2424 }
2425 }
2426 /*
2427 * Ignore when input buffer is not on a boundary
2428 */
2429 if (ret == -3)
2430 ret = 0;
2431 return (written? written : ret);
2432 }
2433
2434 #ifdef LIBXML_OUTPUT_ENABLED
2435 /**
2436 * xmlCharEncOutput:
2437 * @output: a parser output buffer
2438 * @init: is this an initialization call without data
2439 *
2440 * Generic front-end for the encoding handler on parser output
2441 * a first call with @init == 1 has to be made first to initiate the
2442 * output in case of non-stateless encoding needing to initiate their
2443 * state or the output (like the BOM in UTF16).
2444 * In case of UTF8 sequence conversion errors for the given encoder,
2445 * the content will be automatically remapped to a CharRef sequence.
2446 *
2447 * Returns the number of byte written if success, or
2448 * -1 general error
2449 * -2 if the transcoding fails (for *in is not valid utf8 string or
2450 * the result of transformation can't fit into the encoding we want), or
2451 */
2452 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2453 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2454 {
2455 int ret;
2456 size_t written;
2457 int writtentot = 0;
2458 size_t toconv;
2459 int c_in;
2460 int c_out;
2461 xmlBufPtr in;
2462 xmlBufPtr out;
2463
2464 if ((output == NULL) || (output->encoder == NULL) ||
2465 (output->buffer == NULL) || (output->conv == NULL))
2466 return (-1);
2467 out = output->conv;
2468 in = output->buffer;
2469
2470 retry:
2471
2472 written = xmlBufAvail(out);
2473 if (written > 0)
2474 written--; /* count '\0' */
2475
2476 /*
2477 * First specific handling of the initialization call
2478 */
2479 if (init) {
2480 c_in = 0;
2481 c_out = written;
2482 /* TODO: Check return value. */
2483 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2484 NULL, &c_in);
2485 xmlBufAddLen(out, c_out);
2486 #ifdef DEBUG_ENCODING
2487 xmlGenericError(xmlGenericErrorContext,
2488 "initialized encoder\n");
2489 #endif
2490 return(c_out);
2491 }
2492
2493 /*
2494 * Conversion itself.
2495 */
2496 toconv = xmlBufUse(in);
2497 if (toconv == 0)
2498 return (0);
2499 if (toconv > 64 * 1024)
2500 toconv = 64 * 1024;
2501 if (toconv * 4 >= written) {
2502 xmlBufGrow(out, toconv * 4);
2503 written = xmlBufAvail(out) - 1;
2504 }
2505 if (written > 256 * 1024)
2506 written = 256 * 1024;
2507
2508 c_in = toconv;
2509 c_out = written;
2510 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2511 xmlBufContent(in), &c_in);
2512 xmlBufShrink(in, c_in);
2513 xmlBufAddLen(out, c_out);
2514 writtentot += c_out;
2515 if (ret == -1) {
2516 if (c_out > 0) {
2517 /* Can be a limitation of iconv or uconv */
2518 goto retry;
2519 }
2520 ret = -3;
2521 }
2522
2523 /*
2524 * Attempt to handle error cases
2525 */
2526 switch (ret) {
2527 case 0:
2528 #ifdef DEBUG_ENCODING
2529 xmlGenericError(xmlGenericErrorContext,
2530 "converted %d bytes to %d bytes of output\n",
2531 c_in, c_out);
2532 #endif
2533 break;
2534 case -1:
2535 #ifdef DEBUG_ENCODING
2536 xmlGenericError(xmlGenericErrorContext,
2537 "output conversion failed by lack of space\n");
2538 #endif
2539 break;
2540 case -3:
2541 #ifdef DEBUG_ENCODING
2542 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2543 c_in, c_out, (int) xmlBufUse(in));
2544 #endif
2545 break;
2546 case -4:
2547 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2548 "xmlCharEncOutFunc: no output function !\n", NULL);
2549 ret = -1;
2550 break;
2551 case -2: {
2552 xmlChar charref[20];
2553 int len = (int) xmlBufUse(in);
2554 xmlChar *content = xmlBufContent(in);
2555 int cur, charrefLen;
2556
2557 cur = xmlGetUTF8Char(content, &len);
2558 if (cur <= 0)
2559 break;
2560
2561 #ifdef DEBUG_ENCODING
2562 xmlGenericError(xmlGenericErrorContext,
2563 "handling output conversion error\n");
2564 xmlGenericError(xmlGenericErrorContext,
2565 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2566 content[0], content[1],
2567 content[2], content[3]);
2568 #endif
2569 /*
2570 * Removes the UTF8 sequence, and replace it by a charref
2571 * and continue the transcoding phase, hoping the error
2572 * did not mangle the encoder state.
2573 */
2574 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2575 "&#%d;", cur);
2576 xmlBufShrink(in, len);
2577 xmlBufGrow(out, charrefLen * 4);
2578 c_out = xmlBufAvail(out) - 1;
2579 c_in = charrefLen;
2580 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2581 charref, &c_in);
2582
2583 if ((ret < 0) || (c_in != charrefLen)) {
2584 char buf[50];
2585
2586 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2587 content[0], content[1],
2588 content[2], content[3]);
2589 buf[49] = 0;
2590 xmlEncodingErr(XML_I18N_CONV_FAILED,
2591 "output conversion failed due to conv error, bytes %s\n",
2592 buf);
2593 if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2594 content[0] = ' ';
2595 break;
2596 }
2597
2598 xmlBufAddLen(out, c_out);
2599 writtentot += c_out;
2600 goto retry;
2601 }
2602 }
2603 return(writtentot ? writtentot : ret);
2604 }
2605 #endif
2606
2607 /**
2608 * xmlCharEncOutFunc:
2609 * @handler: char encoding transformation data structure
2610 * @out: an xmlBuffer for the output.
2611 * @in: an xmlBuffer for the input
2612 *
2613 * Generic front-end for the encoding handler output function
2614 * a first call with @in == NULL has to be made firs to initiate the
2615 * output in case of non-stateless encoding needing to initiate their
2616 * state or the output (like the BOM in UTF16).
2617 * In case of UTF8 sequence conversion errors for the given encoder,
2618 * the content will be automatically remapped to a CharRef sequence.
2619 *
2620 * Returns the number of byte written if success, or
2621 * -1 general error
2622 * -2 if the transcoding fails (for *in is not valid utf8 string or
2623 * the result of transformation can't fit into the encoding we want), or
2624 */
2625 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2626 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2627 xmlBufferPtr in) {
2628 int ret;
2629 int written;
2630 int writtentot = 0;
2631 int toconv;
2632 int output = 0;
2633
2634 if (handler == NULL) return(-1);
2635 if (out == NULL) return(-1);
2636
2637 retry:
2638
2639 written = out->size - out->use;
2640
2641 if (written > 0)
2642 written--; /* Gennady: count '/0' */
2643
2644 /*
2645 * First specific handling of in = NULL, i.e. the initialization call
2646 */
2647 if (in == NULL) {
2648 toconv = 0;
2649 /* TODO: Check return value. */
2650 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2651 NULL, &toconv);
2652 out->use += written;
2653 out->content[out->use] = 0;
2654 #ifdef DEBUG_ENCODING
2655 xmlGenericError(xmlGenericErrorContext,
2656 "initialized encoder\n");
2657 #endif
2658 return(0);
2659 }
2660
2661 /*
2662 * Conversion itself.
2663 */
2664 toconv = in->use;
2665 if (toconv == 0)
2666 return(0);
2667 if (toconv * 4 >= written) {
2668 xmlBufferGrow(out, toconv * 4);
2669 written = out->size - out->use - 1;
2670 }
2671 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2672 in->content, &toconv);
2673 xmlBufferShrink(in, toconv);
2674 out->use += written;
2675 writtentot += written;
2676 out->content[out->use] = 0;
2677 if (ret == -1) {
2678 if (written > 0) {
2679 /* Can be a limitation of iconv or uconv */
2680 goto retry;
2681 }
2682 ret = -3;
2683 }
2684
2685 if (ret >= 0) output += ret;
2686
2687 /*
2688 * Attempt to handle error cases
2689 */
2690 switch (ret) {
2691 case 0:
2692 #ifdef DEBUG_ENCODING
2693 xmlGenericError(xmlGenericErrorContext,
2694 "converted %d bytes to %d bytes of output\n",
2695 toconv, written);
2696 #endif
2697 break;
2698 case -1:
2699 #ifdef DEBUG_ENCODING
2700 xmlGenericError(xmlGenericErrorContext,
2701 "output conversion failed by lack of space\n");
2702 #endif
2703 break;
2704 case -3:
2705 #ifdef DEBUG_ENCODING
2706 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2707 toconv, written, in->use);
2708 #endif
2709 break;
2710 case -4:
2711 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2712 "xmlCharEncOutFunc: no output function !\n", NULL);
2713 ret = -1;
2714 break;
2715 case -2: {
2716 xmlChar charref[20];
2717 int len = in->use;
2718 const xmlChar *utf = (const xmlChar *) in->content;
2719 int cur, charrefLen;
2720
2721 cur = xmlGetUTF8Char(utf, &len);
2722 if (cur <= 0)
2723 break;
2724
2725 #ifdef DEBUG_ENCODING
2726 xmlGenericError(xmlGenericErrorContext,
2727 "handling output conversion error\n");
2728 xmlGenericError(xmlGenericErrorContext,
2729 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2730 in->content[0], in->content[1],
2731 in->content[2], in->content[3]);
2732 #endif
2733 /*
2734 * Removes the UTF8 sequence, and replace it by a charref
2735 * and continue the transcoding phase, hoping the error
2736 * did not mangle the encoder state.
2737 */
2738 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2739 "&#%d;", cur);
2740 xmlBufferShrink(in, len);
2741 xmlBufferGrow(out, charrefLen * 4);
2742 written = out->size - out->use - 1;
2743 toconv = charrefLen;
2744 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2745 charref, &toconv);
2746
2747 if ((ret < 0) || (toconv != charrefLen)) {
2748 char buf[50];
2749
2750 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2751 in->content[0], in->content[1],
2752 in->content[2], in->content[3]);
2753 buf[49] = 0;
2754 xmlEncodingErr(XML_I18N_CONV_FAILED,
2755 "output conversion failed due to conv error, bytes %s\n",
2756 buf);
2757 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2758 in->content[0] = ' ';
2759 break;
2760 }
2761
2762 out->use += written;
2763 writtentot += written;
2764 out->content[out->use] = 0;
2765 goto retry;
2766 }
2767 }
2768 return(writtentot ? writtentot : ret);
2769 }
2770
2771 /**
2772 * xmlCharEncCloseFunc:
2773 * @handler: char encoding transformation data structure
2774 *
2775 * Generic front-end for encoding handler close function
2776 *
2777 * Returns 0 if success, or -1 in case of error
2778 */
2779 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2780 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2781 int ret = 0;
2782 int tofree = 0;
2783 int i, handler_in_list = 0;
2784
2785 if (handler == NULL) return(-1);
2786 if (handler->name == NULL) return(-1);
2787 if (handlers != NULL) {
2788 for (i = 0;i < nbCharEncodingHandler; i++) {
2789 if (handler == handlers[i]) {
2790 handler_in_list = 1;
2791 break;
2792 }
2793 }
2794 }
2795 #ifdef LIBXML_ICONV_ENABLED
2796 /*
2797 * Iconv handlers can be used only once, free the whole block.
2798 * and the associated icon resources.
2799 */
2800 if ((handler_in_list == 0) &&
2801 ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2802 tofree = 1;
2803 if (handler->iconv_out != NULL) {
2804 if (iconv_close(handler->iconv_out))
2805 ret = -1;
2806 handler->iconv_out = NULL;
2807 }
2808 if (handler->iconv_in != NULL) {
2809 if (iconv_close(handler->iconv_in))
2810 ret = -1;
2811 handler->iconv_in = NULL;
2812 }
2813 }
2814 #endif /* LIBXML_ICONV_ENABLED */
2815 #ifdef LIBXML_ICU_ENABLED
2816 if ((handler_in_list == 0) &&
2817 ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2818 tofree = 1;
2819 if (handler->uconv_out != NULL) {
2820 closeIcuConverter(handler->uconv_out);
2821 handler->uconv_out = NULL;
2822 }
2823 if (handler->uconv_in != NULL) {
2824 closeIcuConverter(handler->uconv_in);
2825 handler->uconv_in = NULL;
2826 }
2827 }
2828 #endif
2829 if (tofree) {
2830 /* free up only dynamic handlers iconv/uconv */
2831 if (handler->name != NULL)
2832 xmlFree(handler->name);
2833 handler->name = NULL;
2834 xmlFree(handler);
2835 }
2836 #ifdef DEBUG_ENCODING
2837 if (ret)
2838 xmlGenericError(xmlGenericErrorContext,
2839 "failed to close the encoding handler\n");
2840 else
2841 xmlGenericError(xmlGenericErrorContext,
2842 "closed the encoding handler\n");
2843 #endif
2844
2845 return(ret);
2846 }
2847
2848 /**
2849 * xmlByteConsumed:
2850 * @ctxt: an XML parser context
2851 *
2852 * This function provides the current index of the parser relative
2853 * to the start of the current entity. This function is computed in
2854 * bytes from the beginning starting at zero and finishing at the
2855 * size in byte of the file if parsing a file. The function is
2856 * of constant cost if the input is UTF-8 but can be costly if run
2857 * on non-UTF-8 input.
2858 *
2859 * Returns the index in bytes from the beginning of the entity or -1
2860 * in case the index could not be computed.
2861 */
2862 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2863 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2864 xmlParserInputPtr in;
2865
2866 if (ctxt == NULL) return(-1);
2867 in = ctxt->input;
2868 if (in == NULL) return(-1);
2869 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2870 unsigned int unused = 0;
2871 xmlCharEncodingHandler * handler = in->buf->encoder;
2872 /*
2873 * Encoding conversion, compute the number of unused original
2874 * bytes from the input not consumed and subtract that from
2875 * the raw consumed value, this is not a cheap operation
2876 */
2877 if (in->end - in->cur > 0) {
2878 unsigned char convbuf[32000];
2879 const unsigned char *cur = (const unsigned char *)in->cur;
2880 int toconv = in->end - in->cur, written = 32000;
2881
2882 int ret;
2883
2884 do {
2885 toconv = in->end - cur;
2886 written = 32000;
2887 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2888 cur, &toconv);
2889 if (ret < 0) {
2890 if (written > 0)
2891 ret = -2;
2892 else
2893 return(-1);
2894 }
2895 unused += written;
2896 cur += toconv;
2897 } while (ret == -2);
2898 }
2899 if (in->buf->rawconsumed < unused)
2900 return(-1);
2901 return(in->buf->rawconsumed - unused);
2902 }
2903 return(in->consumed + (in->cur - in->base));
2904 }
2905
2906 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2907 #ifdef LIBXML_ISO8859X_ENABLED
2908
2909 /**
2910 * UTF8ToISO8859x:
2911 * @out: a pointer to an array of bytes to store the result
2912 * @outlen: the length of @out
2913 * @in: a pointer to an array of UTF-8 chars
2914 * @inlen: the length of @in
2915 * @xlattable: the 2-level transcoding table
2916 *
2917 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2918 * block of chars out.
2919 *
2920 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2921 * The value of @inlen after return is the number of octets consumed
2922 * as the return value is positive, else unpredictable.
2923 * The value of @outlen after return is the number of octets consumed.
2924 */
2925 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2926 UTF8ToISO8859x(unsigned char* out, int *outlen,
2927 const unsigned char* in, int *inlen,
2928 unsigned char const *xlattable) {
2929 const unsigned char* outstart = out;
2930 const unsigned char* inend;
2931 const unsigned char* instart = in;
2932 const unsigned char* processed = in;
2933
2934 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2935 (xlattable == NULL))
2936 return(-1);
2937 if (in == NULL) {
2938 /*
2939 * initialization nothing to do
2940 */
2941 *outlen = 0;
2942 *inlen = 0;
2943 return(0);
2944 }
2945 inend = in + (*inlen);
2946 while (in < inend) {
2947 unsigned char d = *in++;
2948 if (d < 0x80) {
2949 *out++ = d;
2950 } else if (d < 0xC0) {
2951 /* trailing byte in leading position */
2952 *outlen = out - outstart;
2953 *inlen = processed - instart;
2954 return(-2);
2955 } else if (d < 0xE0) {
2956 unsigned char c;
2957 if (!(in < inend)) {
2958 /* trailing byte not in input buffer */
2959 *outlen = out - outstart;
2960 *inlen = processed - instart;
2961 return(-3);
2962 }
2963 c = *in++;
2964 if ((c & 0xC0) != 0x80) {
2965 /* not a trailing byte */
2966 *outlen = out - outstart;
2967 *inlen = processed - instart;
2968 return(-2);
2969 }
2970 c = c & 0x3F;
2971 d = d & 0x1F;
2972 d = xlattable [48 + c + xlattable [d] * 64];
2973 if (d == 0) {
2974 /* not in character set */
2975 *outlen = out - outstart;
2976 *inlen = processed - instart;
2977 return(-2);
2978 }
2979 *out++ = d;
2980 } else if (d < 0xF0) {
2981 unsigned char c1;
2982 unsigned char c2;
2983 if (!(in < inend - 1)) {
2984 /* trailing bytes not in input buffer */
2985 *outlen = out - outstart;
2986 *inlen = processed - instart;
2987 return(-3);
2988 }
2989 c1 = *in++;
2990 if ((c1 & 0xC0) != 0x80) {
2991 /* not a trailing byte (c1) */
2992 *outlen = out - outstart;
2993 *inlen = processed - instart;
2994 return(-2);
2995 }
2996 c2 = *in++;
2997 if ((c2 & 0xC0) != 0x80) {
2998 /* not a trailing byte (c2) */
2999 *outlen = out - outstart;
3000 *inlen = processed - instart;
3001 return(-2);
3002 }
3003 c1 = c1 & 0x3F;
3004 c2 = c2 & 0x3F;
3005 d = d & 0x0F;
3006 d = xlattable [48 + c2 + xlattable [48 + c1 +
3007 xlattable [32 + d] * 64] * 64];
3008 if (d == 0) {
3009 /* not in character set */
3010 *outlen = out - outstart;
3011 *inlen = processed - instart;
3012 return(-2);
3013 }
3014 *out++ = d;
3015 } else {
3016 /* cannot transcode >= U+010000 */
3017 *outlen = out - outstart;
3018 *inlen = processed - instart;
3019 return(-2);
3020 }
3021 processed = in;
3022 }
3023 *outlen = out - outstart;
3024 *inlen = processed - instart;
3025 return(*outlen);
3026 }
3027
3028 /**
3029 * ISO8859xToUTF8
3030 * @out: a pointer to an array of bytes to store the result
3031 * @outlen: the length of @out
3032 * @in: a pointer to an array of ISO Latin 1 chars
3033 * @inlen: the length of @in
3034 *
3035 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3036 * block of chars out.
3037 * Returns 0 if success, or -1 otherwise
3038 * The value of @inlen after return is the number of octets consumed
3039 * The value of @outlen after return is the number of octets produced.
3040 */
3041 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3042 ISO8859xToUTF8(unsigned char* out, int *outlen,
3043 const unsigned char* in, int *inlen,
3044 unsigned short const *unicodetable) {
3045 unsigned char* outstart = out;
3046 unsigned char* outend;
3047 const unsigned char* instart = in;
3048 const unsigned char* inend;
3049 const unsigned char* instop;
3050 unsigned int c;
3051
3052 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3053 (in == NULL) || (unicodetable == NULL))
3054 return(-1);
3055 outend = out + *outlen;
3056 inend = in + *inlen;
3057 instop = inend;
3058
3059 while ((in < inend) && (out < outend - 2)) {
3060 if (*in >= 0x80) {
3061 c = unicodetable [*in - 0x80];
3062 if (c == 0) {
3063 /* undefined code point */
3064 *outlen = out - outstart;
3065 *inlen = in - instart;
3066 return (-1);
3067 }
3068 if (c < 0x800) {
3069 *out++ = ((c >> 6) & 0x1F) | 0xC0;
3070 *out++ = (c & 0x3F) | 0x80;
3071 } else {
3072 *out++ = ((c >> 12) & 0x0F) | 0xE0;
3073 *out++ = ((c >> 6) & 0x3F) | 0x80;
3074 *out++ = (c & 0x3F) | 0x80;
3075 }
3076 ++in;
3077 }
3078 if (instop - in > outend - out) instop = in + (outend - out);
3079 while ((*in < 0x80) && (in < instop)) {
3080 *out++ = *in++;
3081 }
3082 }
3083 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3084 *out++ = *in++;
3085 }
3086 if ((in < inend) && (out < outend) && (*in < 0x80)) {
3087 *out++ = *in++;
3088 }
3089 *outlen = out - outstart;
3090 *inlen = in - instart;
3091 return (*outlen);
3092 }
3093
3094
3095 /************************************************************************
3096 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3097 ************************************************************************/
3098
3099 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3100 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3101 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3102 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3103 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3104 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3105 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3106 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3107 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3108 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3109 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3110 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3111 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3112 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3113 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3114 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3115 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3116 };
3117
3118 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3119 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3120 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3127 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3128 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3129 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3130 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3131 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3132 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3133 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3134 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3139 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3140 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3141 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3142 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3143 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3144 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3145 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3146 };
3147
3148 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3149 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3150 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3151 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3152 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3153 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3154 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3155 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3156 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3157 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3158 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3159 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3160 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3161 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3162 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3163 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3164 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3165 };
3166
3167 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3168 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3169 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3176 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3177 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3178 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3179 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3180 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3181 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3182 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3185 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3194 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3195 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3196 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3197 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3198 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3199 };
3200
3201 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3202 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3203 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3204 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3205 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3206 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3207 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3208 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3209 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3210 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3211 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3212 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3213 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3214 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3215 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3216 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3217 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3218 };
3219
3220 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3221 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3222 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3229 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3230 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3231 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3232 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3233 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3234 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3235 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3236 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3237 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3238 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3239 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3240 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3245 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3246 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3247 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3248 };
3249
3250 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3251 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3252 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3253 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3254 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3255 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3256 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3257 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3258 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3259 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3260 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3261 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3262 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3263 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3264 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3265 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3266 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3267 };
3268
3269 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3270 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3278 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3279 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3280 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3282 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3283 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3284 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3285 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3286 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 };
3298
3299 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3300 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3301 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3302 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3303 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3304 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3305 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3306 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3307 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3308 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3309 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3310 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3311 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3312 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3313 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3314 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3315 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3316 };
3317
3318 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3319 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3321 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3327 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3328 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3329 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3335 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3336 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3337 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3338 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3339 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342 };
3343
3344 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3345 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3346 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3347 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3348 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3349 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3350 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3351 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3352 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3353 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3354 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3355 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3356 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3357 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3358 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3359 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3360 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3361 };
3362
3363 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3364 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3372 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3373 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3374 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3375 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3388 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3389 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3390 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3391 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3392 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395 };
3396
3397 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3398 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3399 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3400 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3401 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3402 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3403 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3404 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3405 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3406 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3407 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3408 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3409 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3410 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3411 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3412 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3413 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3414 };
3415
3416 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3417 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3419 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3425 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3426 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3427 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3428 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3434 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3436 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3441 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3446 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3447 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3448 };
3449
3450 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3451 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3452 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3453 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3454 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3455 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3456 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3457 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3458 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3459 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3460 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3461 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3462 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3463 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3464 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3465 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3466 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3467 };
3468
3469 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3470 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3478 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3479 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3480 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3481 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3482 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3483 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3484 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3491 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493 };
3494
3495 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3496 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3497 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3498 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3499 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3500 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3501 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3502 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3503 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3504 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3505 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3506 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3507 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3508 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3509 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3510 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3511 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3512 };
3513
3514 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3515 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3523 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3524 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3525 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3526 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3527 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3528 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3529 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3530 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3531 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3532 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3533 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3534 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3543 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3544 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3545 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3546 };
3547
3548 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3549 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3550 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3551 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3552 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3553 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3554 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3555 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3556 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3557 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3558 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3559 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3560 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3561 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3562 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3563 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3564 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3565 };
3566
3567 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3568 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3576 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3577 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3583 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3584 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3585 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3586 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3587 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3592 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3593 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3594 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3595 };
3596
3597 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3598 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3599 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3600 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3601 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3602 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3603 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3604 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3605 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3606 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3607 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3608 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3609 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3610 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3611 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3612 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3613 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3614 };
3615
3616 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3617 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3625 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3626 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3627 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3628 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3634 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3637 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3638 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3639 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3640 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3641 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3642 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3643 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3644 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3645 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3646 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3647 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3648 };
3649
3650 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3651 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3652 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3653 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3654 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3655 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3656 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3657 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3658 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3659 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3660 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3661 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3662 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3663 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3664 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3665 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3666 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3667 };
3668
3669 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3670 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3678 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3679 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3680 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3685 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3686 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3687 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3705 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3707 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3708 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3709 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3710 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3711 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3712 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3713 };
3714
3715 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3716 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3717 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3718 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3719 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3720 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3721 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3722 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3723 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3724 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3725 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3726 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3727 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3728 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3729 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3730 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3731 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3732 };
3733
3734 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3735 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3743 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3744 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3745 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3746 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3753 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3758 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3759 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3760 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3761 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3762 };
3763
3764 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3765 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3766 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3767 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3768 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3769 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3770 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3771 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3772 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3773 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3774 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3775 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3776 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3777 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3778 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3779 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3780 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3781 };
3782
3783 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3784 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3785 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3786 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3792 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3793 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3794 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3795 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3796 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3797 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3801 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3803 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3810 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3813 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3814 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3815 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3817 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3820 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3821 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3822 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3823 };
3824
3825
3826 /*
3827 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3828 */
3829
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3830 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3831 const unsigned char* in, int *inlen) {
3832 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3833 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3834 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3835 const unsigned char* in, int *inlen) {
3836 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3837 }
3838
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3839 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3840 const unsigned char* in, int *inlen) {
3841 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3842 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3843 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3844 const unsigned char* in, int *inlen) {
3845 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3846 }
3847
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3848 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3849 const unsigned char* in, int *inlen) {
3850 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3851 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3852 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3853 const unsigned char* in, int *inlen) {
3854 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3855 }
3856
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3857 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3858 const unsigned char* in, int *inlen) {
3859 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3860 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3861 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3862 const unsigned char* in, int *inlen) {
3863 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3864 }
3865
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3866 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3867 const unsigned char* in, int *inlen) {
3868 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3869 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3870 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3871 const unsigned char* in, int *inlen) {
3872 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3873 }
3874
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3875 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3876 const unsigned char* in, int *inlen) {
3877 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3878 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3879 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3880 const unsigned char* in, int *inlen) {
3881 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3882 }
3883
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3884 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3885 const unsigned char* in, int *inlen) {
3886 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3887 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3888 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3889 const unsigned char* in, int *inlen) {
3890 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3891 }
3892
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3893 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3894 const unsigned char* in, int *inlen) {
3895 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3896 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3897 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3898 const unsigned char* in, int *inlen) {
3899 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3900 }
3901
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3902 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3903 const unsigned char* in, int *inlen) {
3904 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3905 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3906 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3907 const unsigned char* in, int *inlen) {
3908 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3909 }
3910
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3911 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3912 const unsigned char* in, int *inlen) {
3913 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3914 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3915 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3916 const unsigned char* in, int *inlen) {
3917 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3918 }
3919
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3920 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3921 const unsigned char* in, int *inlen) {
3922 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3923 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3924 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3925 const unsigned char* in, int *inlen) {
3926 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3927 }
3928
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3929 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3930 const unsigned char* in, int *inlen) {
3931 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3932 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3933 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3934 const unsigned char* in, int *inlen) {
3935 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3936 }
3937
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3938 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3939 const unsigned char* in, int *inlen) {
3940 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3941 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3942 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3943 const unsigned char* in, int *inlen) {
3944 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3945 }
3946
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3947 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3948 const unsigned char* in, int *inlen) {
3949 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3950 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3951 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3952 const unsigned char* in, int *inlen) {
3953 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3954 }
3955
3956 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3957 xmlRegisterCharEncodingHandlersISO8859x (void) {
3958 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3959 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3960 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3961 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3962 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3963 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3964 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3965 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3966 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3967 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3968 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3969 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3970 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3971 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3972 }
3973
3974 #endif
3975 #endif
3976
3977 #define bottom_encoding
3978 #include "elfgcchack.h"
3979