1 /*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * daniel@veillard.com
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21 */
22
23 #define IN_LIBXML
24 #include "libxml.h"
25
26 #include <string.h>
27
28 #ifdef HAVE_CTYPE_H
29 #include <ctype.h>
30 #endif
31 #ifdef HAVE_STDLIB_H
32 #include <stdlib.h>
33 #endif
34 #ifdef LIBXML_ICONV_ENABLED
35 #ifdef HAVE_ERRNO_H
36 #include <errno.h>
37 #endif
38 #endif
39 #include <libxml/encoding.h>
40 #include <libxml/xmlmemory.h>
41 #ifdef LIBXML_HTML_ENABLED
42 #include <libxml/HTMLparser.h>
43 #endif
44 #include <libxml/globals.h>
45 #include <libxml/xmlerror.h>
46
47 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
49
50 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52 struct _xmlCharEncodingAlias {
53 const char *name;
54 const char *alias;
55 };
56
57 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58 static int xmlCharEncodingAliasesNb = 0;
59 static int xmlCharEncodingAliasesMax = 0;
60
61 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
62 #if 0
63 #define DEBUG_ENCODING /* Define this to get encoding traces */
64 #endif
65 #else
66 #ifdef LIBXML_ISO8859X_ENABLED
67 static void xmlRegisterCharEncodingHandlersISO8859x (void);
68 #endif
69 #endif
70
71 static int xmlLittleEndian = 1;
72
73 /**
74 * xmlEncodingErrMemory:
75 * @extra: extra informations
76 *
77 * Handle an out of memory condition
78 */
79 static void
xmlEncodingErrMemory(const char * extra)80 xmlEncodingErrMemory(const char *extra)
81 {
82 __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
83 }
84
85 /**
86 * xmlErrEncoding:
87 * @error: the error number
88 * @msg: the error message
89 *
90 * n encoding error
91 */
92 static void
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)93 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
94 {
95 __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
96 XML_FROM_I18N, error, XML_ERR_FATAL,
97 NULL, 0, val, NULL, NULL, 0, 0, msg, val);
98 }
99
100 #ifdef LIBXML_ICU_ENABLED
101 static uconv_t*
openIcuConverter(const char * name,int toUnicode)102 openIcuConverter(const char* name, int toUnicode)
103 {
104 UErrorCode status = U_ZERO_ERROR;
105 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
106 if (conv == NULL)
107 return NULL;
108
109 conv->uconv = ucnv_open(name, &status);
110 if (U_FAILURE(status))
111 goto error;
112
113 status = U_ZERO_ERROR;
114 if (toUnicode) {
115 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
116 NULL, NULL, NULL, &status);
117 }
118 else {
119 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
120 NULL, NULL, NULL, &status);
121 }
122 if (U_FAILURE(status))
123 goto error;
124
125 status = U_ZERO_ERROR;
126 conv->utf8 = ucnv_open("UTF-8", &status);
127 if (U_SUCCESS(status))
128 return conv;
129
130 error:
131 if (conv->uconv)
132 ucnv_close(conv->uconv);
133 xmlFree(conv);
134 return NULL;
135 }
136
137 static void
closeIcuConverter(uconv_t * conv)138 closeIcuConverter(uconv_t *conv)
139 {
140 if (conv != NULL) {
141 ucnv_close(conv->uconv);
142 ucnv_close(conv->utf8);
143 xmlFree(conv);
144 }
145 }
146 #endif /* LIBXML_ICU_ENABLED */
147
148 /************************************************************************
149 * *
150 * Conversions To/From UTF8 encoding *
151 * *
152 ************************************************************************/
153
154 /**
155 * asciiToUTF8:
156 * @out: a pointer to an array of bytes to store the result
157 * @outlen: the length of @out
158 * @in: a pointer to an array of ASCII chars
159 * @inlen: the length of @in
160 *
161 * Take a block of ASCII chars in and try to convert it to an UTF-8
162 * block of chars out.
163 * Returns 0 if success, or -1 otherwise
164 * The value of @inlen after return is the number of octets consumed
165 * if the return value is positive, else unpredictable.
166 * The value of @outlen after return is the number of octets consumed.
167 */
168 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)169 asciiToUTF8(unsigned char* out, int *outlen,
170 const unsigned char* in, int *inlen) {
171 unsigned char* outstart = out;
172 const unsigned char* base = in;
173 const unsigned char* processed = in;
174 unsigned char* outend = out + *outlen;
175 const unsigned char* inend;
176 unsigned int c;
177
178 inend = in + (*inlen);
179 while ((in < inend) && (out - outstart + 5 < *outlen)) {
180 c= *in++;
181
182 if (out >= outend)
183 break;
184 if (c < 0x80) {
185 *out++ = c;
186 } else {
187 *outlen = out - outstart;
188 *inlen = processed - base;
189 return(-1);
190 }
191
192 processed = (const unsigned char*) in;
193 }
194 *outlen = out - outstart;
195 *inlen = processed - base;
196 return(*outlen);
197 }
198
199 #ifdef LIBXML_OUTPUT_ENABLED
200 /**
201 * UTF8Toascii:
202 * @out: a pointer to an array of bytes to store the result
203 * @outlen: the length of @out
204 * @in: a pointer to an array of UTF-8 chars
205 * @inlen: the length of @in
206 *
207 * Take a block of UTF-8 chars in and try to convert it to an ASCII
208 * block of chars out.
209 *
210 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
211 * The value of @inlen after return is the number of octets consumed
212 * if the return value is positive, else unpredictable.
213 * The value of @outlen after return is the number of octets consumed.
214 */
215 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)216 UTF8Toascii(unsigned char* out, int *outlen,
217 const unsigned char* in, int *inlen) {
218 const unsigned char* processed = in;
219 const unsigned char* outend;
220 const unsigned char* outstart = out;
221 const unsigned char* instart = in;
222 const unsigned char* inend;
223 unsigned int c, d;
224 int trailing;
225
226 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
227 if (in == NULL) {
228 /*
229 * initialization nothing to do
230 */
231 *outlen = 0;
232 *inlen = 0;
233 return(0);
234 }
235 inend = in + (*inlen);
236 outend = out + (*outlen);
237 while (in < inend) {
238 d = *in++;
239 if (d < 0x80) { c= d; trailing= 0; }
240 else if (d < 0xC0) {
241 /* trailing byte in leading position */
242 *outlen = out - outstart;
243 *inlen = processed - instart;
244 return(-2);
245 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
246 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
247 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
248 else {
249 /* no chance for this in Ascii */
250 *outlen = out - outstart;
251 *inlen = processed - instart;
252 return(-2);
253 }
254
255 if (inend - in < trailing) {
256 break;
257 }
258
259 for ( ; trailing; trailing--) {
260 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
261 break;
262 c <<= 6;
263 c |= d & 0x3F;
264 }
265
266 /* assertion: c is a single UTF-4 value */
267 if (c < 0x80) {
268 if (out >= outend)
269 break;
270 *out++ = c;
271 } else {
272 /* no chance for this in Ascii */
273 *outlen = out - outstart;
274 *inlen = processed - instart;
275 return(-2);
276 }
277 processed = in;
278 }
279 *outlen = out - outstart;
280 *inlen = processed - instart;
281 return(*outlen);
282 }
283 #endif /* LIBXML_OUTPUT_ENABLED */
284
285 /**
286 * isolat1ToUTF8:
287 * @out: a pointer to an array of bytes to store the result
288 * @outlen: the length of @out
289 * @in: a pointer to an array of ISO Latin 1 chars
290 * @inlen: the length of @in
291 *
292 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
293 * block of chars out.
294 * Returns the number of bytes written if success, or -1 otherwise
295 * The value of @inlen after return is the number of octets consumed
296 * if the return value is positive, else unpredictable.
297 * The value of @outlen after return is the number of octets consumed.
298 */
299 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)300 isolat1ToUTF8(unsigned char* out, int *outlen,
301 const unsigned char* in, int *inlen) {
302 unsigned char* outstart = out;
303 const unsigned char* base = in;
304 unsigned char* outend;
305 const unsigned char* inend;
306 const unsigned char* instop;
307
308 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
309 return(-1);
310
311 outend = out + *outlen;
312 inend = in + (*inlen);
313 instop = inend;
314
315 while (in < inend && out < outend - 1) {
316 if (*in >= 0x80) {
317 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
318 *out++ = ((*in) & 0x3F) | 0x80;
319 ++in;
320 }
321 if (instop - in > outend - out) instop = in + (outend - out);
322 while (in < instop && *in < 0x80) {
323 *out++ = *in++;
324 }
325 }
326 if (in < inend && out < outend && *in < 0x80) {
327 *out++ = *in++;
328 }
329 *outlen = out - outstart;
330 *inlen = in - base;
331 return(*outlen);
332 }
333
334 /**
335 * UTF8ToUTF8:
336 * @out: a pointer to an array of bytes to store the result
337 * @outlen: the length of @out
338 * @inb: a pointer to an array of UTF-8 chars
339 * @inlenb: the length of @in in UTF-8 chars
340 *
341 * No op copy operation for UTF8 handling.
342 *
343 * Returns the number of bytes written, or -1 if lack of space.
344 * The value of *inlen after return is the number of octets consumed
345 * if the return value is positive, else unpredictable.
346 */
347 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)348 UTF8ToUTF8(unsigned char* out, int *outlen,
349 const unsigned char* inb, int *inlenb)
350 {
351 int len;
352
353 if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
354 return(-1);
355 if (*outlen > *inlenb) {
356 len = *inlenb;
357 } else {
358 len = *outlen;
359 }
360 if (len < 0)
361 return(-1);
362
363 memcpy(out, inb, len);
364
365 *outlen = len;
366 *inlenb = len;
367 return(*outlen);
368 }
369
370
371 #ifdef LIBXML_OUTPUT_ENABLED
372 /**
373 * UTF8Toisolat1:
374 * @out: a pointer to an array of bytes to store the result
375 * @outlen: the length of @out
376 * @in: a pointer to an array of UTF-8 chars
377 * @inlen: the length of @in
378 *
379 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
380 * block of chars out.
381 *
382 * Returns the number of bytes written if success, -2 if the transcoding fails,
383 or -1 otherwise
384 * The value of @inlen after return is the number of octets consumed
385 * if the return value is positive, else unpredictable.
386 * The value of @outlen after return is the number of octets consumed.
387 */
388 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)389 UTF8Toisolat1(unsigned char* out, int *outlen,
390 const unsigned char* in, int *inlen) {
391 const unsigned char* processed = in;
392 const unsigned char* outend;
393 const unsigned char* outstart = out;
394 const unsigned char* instart = in;
395 const unsigned char* inend;
396 unsigned int c, d;
397 int trailing;
398
399 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
400 if (in == NULL) {
401 /*
402 * initialization nothing to do
403 */
404 *outlen = 0;
405 *inlen = 0;
406 return(0);
407 }
408 inend = in + (*inlen);
409 outend = out + (*outlen);
410 while (in < inend) {
411 d = *in++;
412 if (d < 0x80) { c= d; trailing= 0; }
413 else if (d < 0xC0) {
414 /* trailing byte in leading position */
415 *outlen = out - outstart;
416 *inlen = processed - instart;
417 return(-2);
418 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
419 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
420 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
421 else {
422 /* no chance for this in IsoLat1 */
423 *outlen = out - outstart;
424 *inlen = processed - instart;
425 return(-2);
426 }
427
428 if (inend - in < trailing) {
429 break;
430 }
431
432 for ( ; trailing; trailing--) {
433 if (in >= inend)
434 break;
435 if (((d= *in++) & 0xC0) != 0x80) {
436 *outlen = out - outstart;
437 *inlen = processed - instart;
438 return(-2);
439 }
440 c <<= 6;
441 c |= d & 0x3F;
442 }
443
444 /* assertion: c is a single UTF-4 value */
445 if (c <= 0xFF) {
446 if (out >= outend)
447 break;
448 *out++ = c;
449 } else {
450 /* no chance for this in IsoLat1 */
451 *outlen = out - outstart;
452 *inlen = processed - instart;
453 return(-2);
454 }
455 processed = in;
456 }
457 *outlen = out - outstart;
458 *inlen = processed - instart;
459 return(*outlen);
460 }
461 #endif /* LIBXML_OUTPUT_ENABLED */
462
463 /**
464 * UTF16LEToUTF8:
465 * @out: a pointer to an array of bytes to store the result
466 * @outlen: the length of @out
467 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
468 * @inlenb: the length of @in in UTF-16LE chars
469 *
470 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
471 * block of chars out. This function assumes the endian property
472 * is the same between the native type of this machine and the
473 * inputed one.
474 *
475 * Returns the number of bytes written, or -1 if lack of space, or -2
476 * if the transcoding fails (if *in is not a valid utf16 string)
477 * The value of *inlen after return is the number of octets consumed
478 * if the return value is positive, else unpredictable.
479 */
480 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)481 UTF16LEToUTF8(unsigned char* out, int *outlen,
482 const unsigned char* inb, int *inlenb)
483 {
484 unsigned char* outstart = out;
485 const unsigned char* processed = inb;
486 unsigned char* outend = out + *outlen;
487 unsigned short* in = (unsigned short*) inb;
488 unsigned short* inend;
489 unsigned int c, d, inlen;
490 unsigned char *tmp;
491 int bits;
492
493 if ((*inlenb % 2) == 1)
494 (*inlenb)--;
495 inlen = *inlenb / 2;
496 inend = in + inlen;
497 while ((in < inend) && (out - outstart + 5 < *outlen)) {
498 if (xmlLittleEndian) {
499 c= *in++;
500 } else {
501 tmp = (unsigned char *) in;
502 c = *tmp++;
503 c = c | (((unsigned int)*tmp) << 8);
504 in++;
505 }
506 if ((c & 0xFC00) == 0xD800) { /* surrogates */
507 if (in >= inend) { /* (in > inend) shouldn't happens */
508 break;
509 }
510 if (xmlLittleEndian) {
511 d = *in++;
512 } else {
513 tmp = (unsigned char *) in;
514 d = *tmp++;
515 d = d | (((unsigned int)*tmp) << 8);
516 in++;
517 }
518 if ((d & 0xFC00) == 0xDC00) {
519 c &= 0x03FF;
520 c <<= 10;
521 c |= d & 0x03FF;
522 c += 0x10000;
523 }
524 else {
525 *outlen = out - outstart;
526 *inlenb = processed - inb;
527 return(-2);
528 }
529 }
530
531 /* assertion: c is a single UTF-4 value */
532 if (out >= outend)
533 break;
534 if (c < 0x80) { *out++= c; bits= -6; }
535 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
536 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
537 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
538
539 for ( ; bits >= 0; bits-= 6) {
540 if (out >= outend)
541 break;
542 *out++= ((c >> bits) & 0x3F) | 0x80;
543 }
544 processed = (const unsigned char*) in;
545 }
546 *outlen = out - outstart;
547 *inlenb = processed - inb;
548 return(*outlen);
549 }
550
551 #ifdef LIBXML_OUTPUT_ENABLED
552 /**
553 * UTF8ToUTF16LE:
554 * @outb: a pointer to an array of bytes to store the result
555 * @outlen: the length of @outb
556 * @in: a pointer to an array of UTF-8 chars
557 * @inlen: the length of @in
558 *
559 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
560 * block of chars out.
561 *
562 * Returns the number of bytes written, or -1 if lack of space, or -2
563 * if the transcoding failed.
564 */
565 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)566 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
567 const unsigned char* in, int *inlen)
568 {
569 unsigned short* out = (unsigned short*) outb;
570 const unsigned char* processed = in;
571 const unsigned char *const instart = in;
572 unsigned short* outstart= out;
573 unsigned short* outend;
574 const unsigned char* inend;
575 unsigned int c, d;
576 int trailing;
577 unsigned char *tmp;
578 unsigned short tmp1, tmp2;
579
580 /* UTF16LE encoding has no BOM */
581 if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
582 if (in == NULL) {
583 *outlen = 0;
584 *inlen = 0;
585 return(0);
586 }
587 inend= in + *inlen;
588 outend = out + (*outlen / 2);
589 while (in < inend) {
590 d= *in++;
591 if (d < 0x80) { c= d; trailing= 0; }
592 else if (d < 0xC0) {
593 /* trailing byte in leading position */
594 *outlen = (out - outstart) * 2;
595 *inlen = processed - instart;
596 return(-2);
597 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
598 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
599 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
600 else {
601 /* no chance for this in UTF-16 */
602 *outlen = (out - outstart) * 2;
603 *inlen = processed - instart;
604 return(-2);
605 }
606
607 if (inend - in < trailing) {
608 break;
609 }
610
611 for ( ; trailing; trailing--) {
612 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
613 break;
614 c <<= 6;
615 c |= d & 0x3F;
616 }
617
618 /* assertion: c is a single UTF-4 value */
619 if (c < 0x10000) {
620 if (out >= outend)
621 break;
622 if (xmlLittleEndian) {
623 *out++ = c;
624 } else {
625 tmp = (unsigned char *) out;
626 *tmp = c ;
627 *(tmp + 1) = c >> 8 ;
628 out++;
629 }
630 }
631 else if (c < 0x110000) {
632 if (out+1 >= outend)
633 break;
634 c -= 0x10000;
635 if (xmlLittleEndian) {
636 *out++ = 0xD800 | (c >> 10);
637 *out++ = 0xDC00 | (c & 0x03FF);
638 } else {
639 tmp1 = 0xD800 | (c >> 10);
640 tmp = (unsigned char *) out;
641 *tmp = (unsigned char) tmp1;
642 *(tmp + 1) = tmp1 >> 8;
643 out++;
644
645 tmp2 = 0xDC00 | (c & 0x03FF);
646 tmp = (unsigned char *) out;
647 *tmp = (unsigned char) tmp2;
648 *(tmp + 1) = tmp2 >> 8;
649 out++;
650 }
651 }
652 else
653 break;
654 processed = in;
655 }
656 *outlen = (out - outstart) * 2;
657 *inlen = processed - instart;
658 return(*outlen);
659 }
660
661 /**
662 * UTF8ToUTF16:
663 * @outb: a pointer to an array of bytes to store the result
664 * @outlen: the length of @outb
665 * @in: a pointer to an array of UTF-8 chars
666 * @inlen: the length of @in
667 *
668 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
669 * block of chars out.
670 *
671 * Returns the number of bytes written, or -1 if lack of space, or -2
672 * if the transcoding failed.
673 */
674 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)675 UTF8ToUTF16(unsigned char* outb, int *outlen,
676 const unsigned char* in, int *inlen)
677 {
678 if (in == NULL) {
679 /*
680 * initialization, add the Byte Order Mark for UTF-16LE
681 */
682 if (*outlen >= 2) {
683 outb[0] = 0xFF;
684 outb[1] = 0xFE;
685 *outlen = 2;
686 *inlen = 0;
687 #ifdef DEBUG_ENCODING
688 xmlGenericError(xmlGenericErrorContext,
689 "Added FFFE Byte Order Mark\n");
690 #endif
691 return(2);
692 }
693 *outlen = 0;
694 *inlen = 0;
695 return(0);
696 }
697 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
698 }
699 #endif /* LIBXML_OUTPUT_ENABLED */
700
701 /**
702 * UTF16BEToUTF8:
703 * @out: a pointer to an array of bytes to store the result
704 * @outlen: the length of @out
705 * @inb: a pointer to an array of UTF-16 passed as a byte array
706 * @inlenb: the length of @in in UTF-16 chars
707 *
708 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
709 * block of chars out. This function assumes the endian property
710 * is the same between the native type of this machine and the
711 * inputed one.
712 *
713 * Returns the number of bytes written, or -1 if lack of space, or -2
714 * if the transcoding fails (if *in is not a valid utf16 string)
715 * The value of *inlen after return is the number of octets consumed
716 * if the return value is positive, else unpredictable.
717 */
718 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)719 UTF16BEToUTF8(unsigned char* out, int *outlen,
720 const unsigned char* inb, int *inlenb)
721 {
722 unsigned char* outstart = out;
723 const unsigned char* processed = inb;
724 unsigned char* outend = out + *outlen;
725 unsigned short* in = (unsigned short*) inb;
726 unsigned short* inend;
727 unsigned int c, d, inlen;
728 unsigned char *tmp;
729 int bits;
730
731 if ((*inlenb % 2) == 1)
732 (*inlenb)--;
733 inlen = *inlenb / 2;
734 inend= in + inlen;
735 while (in < inend) {
736 if (xmlLittleEndian) {
737 tmp = (unsigned char *) in;
738 c = *tmp++;
739 c = c << 8;
740 c = c | (unsigned int) *tmp;
741 in++;
742 } else {
743 c= *in++;
744 }
745 if ((c & 0xFC00) == 0xD800) { /* surrogates */
746 if (in >= inend) { /* (in > inend) shouldn't happens */
747 *outlen = out - outstart;
748 *inlenb = processed - inb;
749 return(-2);
750 }
751 if (xmlLittleEndian) {
752 tmp = (unsigned char *) in;
753 d = *tmp++;
754 d = d << 8;
755 d = d | (unsigned int) *tmp;
756 in++;
757 } else {
758 d= *in++;
759 }
760 if ((d & 0xFC00) == 0xDC00) {
761 c &= 0x03FF;
762 c <<= 10;
763 c |= d & 0x03FF;
764 c += 0x10000;
765 }
766 else {
767 *outlen = out - outstart;
768 *inlenb = processed - inb;
769 return(-2);
770 }
771 }
772
773 /* assertion: c is a single UTF-4 value */
774 if (out >= outend)
775 break;
776 if (c < 0x80) { *out++= c; bits= -6; }
777 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
778 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
779 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
780
781 for ( ; bits >= 0; bits-= 6) {
782 if (out >= outend)
783 break;
784 *out++= ((c >> bits) & 0x3F) | 0x80;
785 }
786 processed = (const unsigned char*) in;
787 }
788 *outlen = out - outstart;
789 *inlenb = processed - inb;
790 return(*outlen);
791 }
792
793 #ifdef LIBXML_OUTPUT_ENABLED
794 /**
795 * UTF8ToUTF16BE:
796 * @outb: a pointer to an array of bytes to store the result
797 * @outlen: the length of @outb
798 * @in: a pointer to an array of UTF-8 chars
799 * @inlen: the length of @in
800 *
801 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
802 * block of chars out.
803 *
804 * Returns the number of byte written, or -1 by lack of space, or -2
805 * if the transcoding failed.
806 */
807 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)808 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
809 const unsigned char* in, int *inlen)
810 {
811 unsigned short* out = (unsigned short*) outb;
812 const unsigned char* processed = in;
813 const unsigned char *const instart = in;
814 unsigned short* outstart= out;
815 unsigned short* outend;
816 const unsigned char* inend;
817 unsigned int c, d;
818 int trailing;
819 unsigned char *tmp;
820 unsigned short tmp1, tmp2;
821
822 /* UTF-16BE has no BOM */
823 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
824 if (in == NULL) {
825 *outlen = 0;
826 *inlen = 0;
827 return(0);
828 }
829 inend= in + *inlen;
830 outend = out + (*outlen / 2);
831 while (in < inend) {
832 d= *in++;
833 if (d < 0x80) { c= d; trailing= 0; }
834 else if (d < 0xC0) {
835 /* trailing byte in leading position */
836 *outlen = out - outstart;
837 *inlen = processed - instart;
838 return(-2);
839 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
840 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
841 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
842 else {
843 /* no chance for this in UTF-16 */
844 *outlen = out - outstart;
845 *inlen = processed - instart;
846 return(-2);
847 }
848
849 if (inend - in < trailing) {
850 break;
851 }
852
853 for ( ; trailing; trailing--) {
854 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
855 c <<= 6;
856 c |= d & 0x3F;
857 }
858
859 /* assertion: c is a single UTF-4 value */
860 if (c < 0x10000) {
861 if (out >= outend) break;
862 if (xmlLittleEndian) {
863 tmp = (unsigned char *) out;
864 *tmp = c >> 8;
865 *(tmp + 1) = c;
866 out++;
867 } else {
868 *out++ = c;
869 }
870 }
871 else if (c < 0x110000) {
872 if (out+1 >= outend) break;
873 c -= 0x10000;
874 if (xmlLittleEndian) {
875 tmp1 = 0xD800 | (c >> 10);
876 tmp = (unsigned char *) out;
877 *tmp = tmp1 >> 8;
878 *(tmp + 1) = (unsigned char) tmp1;
879 out++;
880
881 tmp2 = 0xDC00 | (c & 0x03FF);
882 tmp = (unsigned char *) out;
883 *tmp = tmp2 >> 8;
884 *(tmp + 1) = (unsigned char) tmp2;
885 out++;
886 } else {
887 *out++ = 0xD800 | (c >> 10);
888 *out++ = 0xDC00 | (c & 0x03FF);
889 }
890 }
891 else
892 break;
893 processed = in;
894 }
895 *outlen = (out - outstart) * 2;
896 *inlen = processed - instart;
897 return(*outlen);
898 }
899 #endif /* LIBXML_OUTPUT_ENABLED */
900
901 /************************************************************************
902 * *
903 * Generic encoding handling routines *
904 * *
905 ************************************************************************/
906
907 /**
908 * xmlDetectCharEncoding:
909 * @in: a pointer to the first bytes of the XML entity, must be at least
910 * 2 bytes long (at least 4 if encoding is UTF4 variant).
911 * @len: pointer to the length of the buffer
912 *
913 * Guess the encoding of the entity using the first bytes of the entity content
914 * according to the non-normative appendix F of the XML-1.0 recommendation.
915 *
916 * Returns one of the XML_CHAR_ENCODING_... values.
917 */
918 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)919 xmlDetectCharEncoding(const unsigned char* in, int len)
920 {
921 if (in == NULL)
922 return(XML_CHAR_ENCODING_NONE);
923 if (len >= 4) {
924 if ((in[0] == 0x00) && (in[1] == 0x00) &&
925 (in[2] == 0x00) && (in[3] == 0x3C))
926 return(XML_CHAR_ENCODING_UCS4BE);
927 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
928 (in[2] == 0x00) && (in[3] == 0x00))
929 return(XML_CHAR_ENCODING_UCS4LE);
930 if ((in[0] == 0x00) && (in[1] == 0x00) &&
931 (in[2] == 0x3C) && (in[3] == 0x00))
932 return(XML_CHAR_ENCODING_UCS4_2143);
933 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
934 (in[2] == 0x00) && (in[3] == 0x00))
935 return(XML_CHAR_ENCODING_UCS4_3412);
936 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
937 (in[2] == 0xA7) && (in[3] == 0x94))
938 return(XML_CHAR_ENCODING_EBCDIC);
939 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
940 (in[2] == 0x78) && (in[3] == 0x6D))
941 return(XML_CHAR_ENCODING_UTF8);
942 /*
943 * Although not part of the recommendation, we also
944 * attempt an "auto-recognition" of UTF-16LE and
945 * UTF-16BE encodings.
946 */
947 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
948 (in[2] == 0x3F) && (in[3] == 0x00))
949 return(XML_CHAR_ENCODING_UTF16LE);
950 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
951 (in[2] == 0x00) && (in[3] == 0x3F))
952 return(XML_CHAR_ENCODING_UTF16BE);
953 }
954 if (len >= 3) {
955 /*
956 * Errata on XML-1.0 June 20 2001
957 * We now allow an UTF8 encoded BOM
958 */
959 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
960 (in[2] == 0xBF))
961 return(XML_CHAR_ENCODING_UTF8);
962 }
963 /* For UTF-16 we can recognize by the BOM */
964 if (len >= 2) {
965 if ((in[0] == 0xFE) && (in[1] == 0xFF))
966 return(XML_CHAR_ENCODING_UTF16BE);
967 if ((in[0] == 0xFF) && (in[1] == 0xFE))
968 return(XML_CHAR_ENCODING_UTF16LE);
969 }
970 return(XML_CHAR_ENCODING_NONE);
971 }
972
973 /**
974 * xmlCleanupEncodingAliases:
975 *
976 * Unregisters all aliases
977 */
978 void
xmlCleanupEncodingAliases(void)979 xmlCleanupEncodingAliases(void) {
980 int i;
981
982 if (xmlCharEncodingAliases == NULL)
983 return;
984
985 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
986 if (xmlCharEncodingAliases[i].name != NULL)
987 xmlFree((char *) xmlCharEncodingAliases[i].name);
988 if (xmlCharEncodingAliases[i].alias != NULL)
989 xmlFree((char *) xmlCharEncodingAliases[i].alias);
990 }
991 xmlCharEncodingAliasesNb = 0;
992 xmlCharEncodingAliasesMax = 0;
993 xmlFree(xmlCharEncodingAliases);
994 xmlCharEncodingAliases = NULL;
995 }
996
997 /**
998 * xmlGetEncodingAlias:
999 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1000 *
1001 * Lookup an encoding name for the given alias.
1002 *
1003 * Returns NULL if not found, otherwise the original name
1004 */
1005 const char *
xmlGetEncodingAlias(const char * alias)1006 xmlGetEncodingAlias(const char *alias) {
1007 int i;
1008 char upper[100];
1009
1010 if (alias == NULL)
1011 return(NULL);
1012
1013 if (xmlCharEncodingAliases == NULL)
1014 return(NULL);
1015
1016 for (i = 0;i < 99;i++) {
1017 upper[i] = toupper(alias[i]);
1018 if (upper[i] == 0) break;
1019 }
1020 upper[i] = 0;
1021
1022 /*
1023 * Walk down the list looking for a definition of the alias
1024 */
1025 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027 return(xmlCharEncodingAliases[i].name);
1028 }
1029 }
1030 return(NULL);
1031 }
1032
1033 /**
1034 * xmlAddEncodingAlias:
1035 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1036 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1037 *
1038 * Registers an alias @alias for an encoding named @name. Existing alias
1039 * will be overwritten.
1040 *
1041 * Returns 0 in case of success, -1 in case of error
1042 */
1043 int
xmlAddEncodingAlias(const char * name,const char * alias)1044 xmlAddEncodingAlias(const char *name, const char *alias) {
1045 int i;
1046 char upper[100];
1047
1048 if ((name == NULL) || (alias == NULL))
1049 return(-1);
1050
1051 for (i = 0;i < 99;i++) {
1052 upper[i] = toupper(alias[i]);
1053 if (upper[i] == 0) break;
1054 }
1055 upper[i] = 0;
1056
1057 if (xmlCharEncodingAliases == NULL) {
1058 xmlCharEncodingAliasesNb = 0;
1059 xmlCharEncodingAliasesMax = 20;
1060 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1061 xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1062 if (xmlCharEncodingAliases == NULL)
1063 return(-1);
1064 } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1065 xmlCharEncodingAliasesMax *= 2;
1066 xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1067 xmlRealloc(xmlCharEncodingAliases,
1068 xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1069 }
1070 /*
1071 * Walk down the list looking for a definition of the alias
1072 */
1073 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075 /*
1076 * Replace the definition.
1077 */
1078 xmlFree((char *) xmlCharEncodingAliases[i].name);
1079 xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1080 return(0);
1081 }
1082 }
1083 /*
1084 * Add the definition
1085 */
1086 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1087 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1088 xmlCharEncodingAliasesNb++;
1089 return(0);
1090 }
1091
1092 /**
1093 * xmlDelEncodingAlias:
1094 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1095 *
1096 * Unregisters an encoding alias @alias
1097 *
1098 * Returns 0 in case of success, -1 in case of error
1099 */
1100 int
xmlDelEncodingAlias(const char * alias)1101 xmlDelEncodingAlias(const char *alias) {
1102 int i;
1103
1104 if (alias == NULL)
1105 return(-1);
1106
1107 if (xmlCharEncodingAliases == NULL)
1108 return(-1);
1109 /*
1110 * Walk down the list looking for a definition of the alias
1111 */
1112 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1113 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1114 xmlFree((char *) xmlCharEncodingAliases[i].name);
1115 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1116 xmlCharEncodingAliasesNb--;
1117 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1118 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1119 return(0);
1120 }
1121 }
1122 return(-1);
1123 }
1124
1125 /**
1126 * xmlParseCharEncoding:
1127 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1128 *
1129 * Compare the string to the encoding schemes already known. Note
1130 * that the comparison is case insensitive accordingly to the section
1131 * [XML] 4.3.3 Character Encoding in Entities.
1132 *
1133 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1134 * if not recognized.
1135 */
1136 xmlCharEncoding
xmlParseCharEncoding(const char * name)1137 xmlParseCharEncoding(const char* name)
1138 {
1139 const char *alias;
1140 char upper[500];
1141 int i;
1142
1143 if (name == NULL)
1144 return(XML_CHAR_ENCODING_NONE);
1145
1146 /*
1147 * Do the alias resolution
1148 */
1149 alias = xmlGetEncodingAlias(name);
1150 if (alias != NULL)
1151 name = alias;
1152
1153 for (i = 0;i < 499;i++) {
1154 upper[i] = toupper(name[i]);
1155 if (upper[i] == 0) break;
1156 }
1157 upper[i] = 0;
1158
1159 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1160 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1161 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1162
1163 /*
1164 * NOTE: if we were able to parse this, the endianness of UTF16 is
1165 * already found and in use
1166 */
1167 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1168 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1169
1170 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1171 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1172 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1173
1174 /*
1175 * NOTE: if we were able to parse this, the endianness of UCS4 is
1176 * already found and in use
1177 */
1178 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1179 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1180 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1181
1182
1183 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1184 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1185 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1186
1187 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1188 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1189 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1190
1191 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1192 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1193 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1194 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1195 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1196 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1197 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1198
1199 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1200 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1201 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1202
1203 #ifdef DEBUG_ENCODING
1204 xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1205 #endif
1206 return(XML_CHAR_ENCODING_ERROR);
1207 }
1208
1209 /**
1210 * xmlGetCharEncodingName:
1211 * @enc: the encoding
1212 *
1213 * The "canonical" name for XML encoding.
1214 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1215 * Section 4.3.3 Character Encoding in Entities
1216 *
1217 * Returns the canonical name for the given encoding
1218 */
1219
1220 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1221 xmlGetCharEncodingName(xmlCharEncoding enc) {
1222 switch (enc) {
1223 case XML_CHAR_ENCODING_ERROR:
1224 return(NULL);
1225 case XML_CHAR_ENCODING_NONE:
1226 return(NULL);
1227 case XML_CHAR_ENCODING_UTF8:
1228 return("UTF-8");
1229 case XML_CHAR_ENCODING_UTF16LE:
1230 return("UTF-16");
1231 case XML_CHAR_ENCODING_UTF16BE:
1232 return("UTF-16");
1233 case XML_CHAR_ENCODING_EBCDIC:
1234 return("EBCDIC");
1235 case XML_CHAR_ENCODING_UCS4LE:
1236 return("ISO-10646-UCS-4");
1237 case XML_CHAR_ENCODING_UCS4BE:
1238 return("ISO-10646-UCS-4");
1239 case XML_CHAR_ENCODING_UCS4_2143:
1240 return("ISO-10646-UCS-4");
1241 case XML_CHAR_ENCODING_UCS4_3412:
1242 return("ISO-10646-UCS-4");
1243 case XML_CHAR_ENCODING_UCS2:
1244 return("ISO-10646-UCS-2");
1245 case XML_CHAR_ENCODING_8859_1:
1246 return("ISO-8859-1");
1247 case XML_CHAR_ENCODING_8859_2:
1248 return("ISO-8859-2");
1249 case XML_CHAR_ENCODING_8859_3:
1250 return("ISO-8859-3");
1251 case XML_CHAR_ENCODING_8859_4:
1252 return("ISO-8859-4");
1253 case XML_CHAR_ENCODING_8859_5:
1254 return("ISO-8859-5");
1255 case XML_CHAR_ENCODING_8859_6:
1256 return("ISO-8859-6");
1257 case XML_CHAR_ENCODING_8859_7:
1258 return("ISO-8859-7");
1259 case XML_CHAR_ENCODING_8859_8:
1260 return("ISO-8859-8");
1261 case XML_CHAR_ENCODING_8859_9:
1262 return("ISO-8859-9");
1263 case XML_CHAR_ENCODING_2022_JP:
1264 return("ISO-2022-JP");
1265 case XML_CHAR_ENCODING_SHIFT_JIS:
1266 return("Shift-JIS");
1267 case XML_CHAR_ENCODING_EUC_JP:
1268 return("EUC-JP");
1269 case XML_CHAR_ENCODING_ASCII:
1270 return(NULL);
1271 }
1272 return(NULL);
1273 }
1274
1275 /************************************************************************
1276 * *
1277 * Char encoding handlers *
1278 * *
1279 ************************************************************************/
1280
1281
1282 /* the size should be growable, but it's not a big deal ... */
1283 #define MAX_ENCODING_HANDLERS 50
1284 static xmlCharEncodingHandlerPtr *handlers = NULL;
1285 static int nbCharEncodingHandler = 0;
1286
1287 /*
1288 * The default is UTF-8 for XML, that's also the default used for the
1289 * parser internals, so the default encoding handler is NULL
1290 */
1291
1292 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1293
1294 /**
1295 * xmlNewCharEncodingHandler:
1296 * @name: the encoding name, in UTF-8 format (ASCII actually)
1297 * @input: the xmlCharEncodingInputFunc to read that encoding
1298 * @output: the xmlCharEncodingOutputFunc to write that encoding
1299 *
1300 * Create and registers an xmlCharEncodingHandler.
1301 *
1302 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1303 */
1304 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1305 xmlNewCharEncodingHandler(const char *name,
1306 xmlCharEncodingInputFunc input,
1307 xmlCharEncodingOutputFunc output) {
1308 xmlCharEncodingHandlerPtr handler;
1309 const char *alias;
1310 char upper[500];
1311 int i;
1312 char *up = NULL;
1313
1314 /*
1315 * Do the alias resolution
1316 */
1317 alias = xmlGetEncodingAlias(name);
1318 if (alias != NULL)
1319 name = alias;
1320
1321 /*
1322 * Keep only the uppercase version of the encoding.
1323 */
1324 if (name == NULL) {
1325 xmlEncodingErr(XML_I18N_NO_NAME,
1326 "xmlNewCharEncodingHandler : no name !\n", NULL);
1327 return(NULL);
1328 }
1329 for (i = 0;i < 499;i++) {
1330 upper[i] = toupper(name[i]);
1331 if (upper[i] == 0) break;
1332 }
1333 upper[i] = 0;
1334 up = xmlMemStrdup(upper);
1335 if (up == NULL) {
1336 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1337 return(NULL);
1338 }
1339
1340 /*
1341 * allocate and fill-up an handler block.
1342 */
1343 handler = (xmlCharEncodingHandlerPtr)
1344 xmlMalloc(sizeof(xmlCharEncodingHandler));
1345 if (handler == NULL) {
1346 xmlFree(up);
1347 xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1348 return(NULL);
1349 }
1350 handler->input = input;
1351 handler->output = output;
1352 handler->name = up;
1353
1354 #ifdef LIBXML_ICONV_ENABLED
1355 handler->iconv_in = NULL;
1356 handler->iconv_out = NULL;
1357 #endif
1358 #ifdef LIBXML_ICU_ENABLED
1359 handler->uconv_in = NULL;
1360 handler->uconv_out = NULL;
1361 #endif
1362
1363 /*
1364 * registers and returns the handler.
1365 */
1366 xmlRegisterCharEncodingHandler(handler);
1367 #ifdef DEBUG_ENCODING
1368 xmlGenericError(xmlGenericErrorContext,
1369 "Registered encoding handler for %s\n", name);
1370 #endif
1371 return(handler);
1372 }
1373
1374 /**
1375 * xmlInitCharEncodingHandlers:
1376 *
1377 * Initialize the char encoding support, it registers the default
1378 * encoding supported.
1379 * NOTE: while public, this function usually doesn't need to be called
1380 * in normal processing.
1381 */
1382 void
xmlInitCharEncodingHandlers(void)1383 xmlInitCharEncodingHandlers(void) {
1384 unsigned short int tst = 0x1234;
1385 unsigned char *ptr = (unsigned char *) &tst;
1386
1387 if (handlers != NULL) return;
1388
1389 handlers = (xmlCharEncodingHandlerPtr *)
1390 xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1391
1392 if (*ptr == 0x12) xmlLittleEndian = 0;
1393 else if (*ptr == 0x34) xmlLittleEndian = 1;
1394 else {
1395 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1396 "Odd problem at endianness detection\n", NULL);
1397 }
1398
1399 if (handlers == NULL) {
1400 xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1401 return;
1402 }
1403 xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1404 #ifdef LIBXML_OUTPUT_ENABLED
1405 xmlUTF16LEHandler =
1406 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1407 xmlUTF16BEHandler =
1408 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1409 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1410 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1411 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1412 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1413 #ifdef LIBXML_HTML_ENABLED
1414 xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1415 #endif
1416 #else
1417 xmlUTF16LEHandler =
1418 xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1419 xmlUTF16BEHandler =
1420 xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1421 xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1422 xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1423 xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1424 xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1425 #endif /* LIBXML_OUTPUT_ENABLED */
1426 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1427 #ifdef LIBXML_ISO8859X_ENABLED
1428 xmlRegisterCharEncodingHandlersISO8859x ();
1429 #endif
1430 #endif
1431
1432 }
1433
1434 /**
1435 * xmlCleanupCharEncodingHandlers:
1436 *
1437 * Cleanup the memory allocated for the char encoding support, it
1438 * unregisters all the encoding handlers and the aliases.
1439 */
1440 void
xmlCleanupCharEncodingHandlers(void)1441 xmlCleanupCharEncodingHandlers(void) {
1442 xmlCleanupEncodingAliases();
1443
1444 if (handlers == NULL) return;
1445
1446 for (;nbCharEncodingHandler > 0;) {
1447 nbCharEncodingHandler--;
1448 if (handlers[nbCharEncodingHandler] != NULL) {
1449 if (handlers[nbCharEncodingHandler]->name != NULL)
1450 xmlFree(handlers[nbCharEncodingHandler]->name);
1451 xmlFree(handlers[nbCharEncodingHandler]);
1452 }
1453 }
1454 xmlFree(handlers);
1455 handlers = NULL;
1456 nbCharEncodingHandler = 0;
1457 xmlDefaultCharEncodingHandler = NULL;
1458 }
1459
1460 /**
1461 * xmlRegisterCharEncodingHandler:
1462 * @handler: the xmlCharEncodingHandlerPtr handler block
1463 *
1464 * Register the char encoding handler, surprising, isn't it ?
1465 */
1466 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1467 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1468 if (handlers == NULL) xmlInitCharEncodingHandlers();
1469 if (handler == NULL) {
1470 xmlEncodingErr(XML_I18N_NO_HANDLER,
1471 "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1472 return;
1473 }
1474
1475 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1476 xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1477 "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1478 "MAX_ENCODING_HANDLERS");
1479 return;
1480 }
1481 handlers[nbCharEncodingHandler++] = handler;
1482 }
1483
1484 /**
1485 * xmlGetCharEncodingHandler:
1486 * @enc: an xmlCharEncoding value.
1487 *
1488 * Search in the registered set the handler able to read/write that encoding.
1489 *
1490 * Returns the handler or NULL if not found
1491 */
1492 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1493 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1494 xmlCharEncodingHandlerPtr handler;
1495
1496 if (handlers == NULL) xmlInitCharEncodingHandlers();
1497 switch (enc) {
1498 case XML_CHAR_ENCODING_ERROR:
1499 return(NULL);
1500 case XML_CHAR_ENCODING_NONE:
1501 return(NULL);
1502 case XML_CHAR_ENCODING_UTF8:
1503 return(NULL);
1504 case XML_CHAR_ENCODING_UTF16LE:
1505 return(xmlUTF16LEHandler);
1506 case XML_CHAR_ENCODING_UTF16BE:
1507 return(xmlUTF16BEHandler);
1508 case XML_CHAR_ENCODING_EBCDIC:
1509 handler = xmlFindCharEncodingHandler("EBCDIC");
1510 if (handler != NULL) return(handler);
1511 handler = xmlFindCharEncodingHandler("ebcdic");
1512 if (handler != NULL) return(handler);
1513 break;
1514 case XML_CHAR_ENCODING_UCS4BE:
1515 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1516 if (handler != NULL) return(handler);
1517 handler = xmlFindCharEncodingHandler("UCS-4");
1518 if (handler != NULL) return(handler);
1519 handler = xmlFindCharEncodingHandler("UCS4");
1520 if (handler != NULL) return(handler);
1521 break;
1522 case XML_CHAR_ENCODING_UCS4LE:
1523 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1524 if (handler != NULL) return(handler);
1525 handler = xmlFindCharEncodingHandler("UCS-4");
1526 if (handler != NULL) return(handler);
1527 handler = xmlFindCharEncodingHandler("UCS4");
1528 if (handler != NULL) return(handler);
1529 break;
1530 case XML_CHAR_ENCODING_UCS4_2143:
1531 break;
1532 case XML_CHAR_ENCODING_UCS4_3412:
1533 break;
1534 case XML_CHAR_ENCODING_UCS2:
1535 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1536 if (handler != NULL) return(handler);
1537 handler = xmlFindCharEncodingHandler("UCS-2");
1538 if (handler != NULL) return(handler);
1539 handler = xmlFindCharEncodingHandler("UCS2");
1540 if (handler != NULL) return(handler);
1541 break;
1542
1543 /*
1544 * We used to keep ISO Latin encodings native in the
1545 * generated data. This led to so many problems that
1546 * this has been removed. One can still change this
1547 * back by registering no-ops encoders for those
1548 */
1549 case XML_CHAR_ENCODING_8859_1:
1550 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1551 if (handler != NULL) return(handler);
1552 break;
1553 case XML_CHAR_ENCODING_8859_2:
1554 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1555 if (handler != NULL) return(handler);
1556 break;
1557 case XML_CHAR_ENCODING_8859_3:
1558 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1559 if (handler != NULL) return(handler);
1560 break;
1561 case XML_CHAR_ENCODING_8859_4:
1562 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1563 if (handler != NULL) return(handler);
1564 break;
1565 case XML_CHAR_ENCODING_8859_5:
1566 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1567 if (handler != NULL) return(handler);
1568 break;
1569 case XML_CHAR_ENCODING_8859_6:
1570 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1571 if (handler != NULL) return(handler);
1572 break;
1573 case XML_CHAR_ENCODING_8859_7:
1574 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1575 if (handler != NULL) return(handler);
1576 break;
1577 case XML_CHAR_ENCODING_8859_8:
1578 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1579 if (handler != NULL) return(handler);
1580 break;
1581 case XML_CHAR_ENCODING_8859_9:
1582 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1583 if (handler != NULL) return(handler);
1584 break;
1585
1586
1587 case XML_CHAR_ENCODING_2022_JP:
1588 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1589 if (handler != NULL) return(handler);
1590 break;
1591 case XML_CHAR_ENCODING_SHIFT_JIS:
1592 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1593 if (handler != NULL) return(handler);
1594 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1595 if (handler != NULL) return(handler);
1596 handler = xmlFindCharEncodingHandler("Shift_JIS");
1597 if (handler != NULL) return(handler);
1598 break;
1599 case XML_CHAR_ENCODING_EUC_JP:
1600 handler = xmlFindCharEncodingHandler("EUC-JP");
1601 if (handler != NULL) return(handler);
1602 break;
1603 default:
1604 break;
1605 }
1606
1607 #ifdef DEBUG_ENCODING
1608 xmlGenericError(xmlGenericErrorContext,
1609 "No handler found for encoding %d\n", enc);
1610 #endif
1611 return(NULL);
1612 }
1613
1614 /**
1615 * xmlFindCharEncodingHandler:
1616 * @name: a string describing the char encoding.
1617 *
1618 * Search in the registered set the handler able to read/write that encoding.
1619 *
1620 * Returns the handler or NULL if not found
1621 */
1622 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1623 xmlFindCharEncodingHandler(const char *name) {
1624 const char *nalias;
1625 const char *norig;
1626 xmlCharEncoding alias;
1627 #ifdef LIBXML_ICONV_ENABLED
1628 xmlCharEncodingHandlerPtr enc;
1629 iconv_t icv_in, icv_out;
1630 #endif /* LIBXML_ICONV_ENABLED */
1631 #ifdef LIBXML_ICU_ENABLED
1632 xmlCharEncodingHandlerPtr enc;
1633 uconv_t *ucv_in, *ucv_out;
1634 #endif /* LIBXML_ICU_ENABLED */
1635 char upper[100];
1636 int i;
1637
1638 if (handlers == NULL) xmlInitCharEncodingHandlers();
1639 if (name == NULL) return(xmlDefaultCharEncodingHandler);
1640 if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1641
1642 /*
1643 * Do the alias resolution
1644 */
1645 norig = name;
1646 nalias = xmlGetEncodingAlias(name);
1647 if (nalias != NULL)
1648 name = nalias;
1649
1650 /*
1651 * Check first for directly registered encoding names
1652 */
1653 for (i = 0;i < 99;i++) {
1654 upper[i] = toupper(name[i]);
1655 if (upper[i] == 0) break;
1656 }
1657 upper[i] = 0;
1658
1659 for (i = 0;i < nbCharEncodingHandler; i++)
1660 if (!strcmp(upper, handlers[i]->name)) {
1661 #ifdef DEBUG_ENCODING
1662 xmlGenericError(xmlGenericErrorContext,
1663 "Found registered handler for encoding %s\n", name);
1664 #endif
1665 return(handlers[i]);
1666 }
1667
1668 #ifdef LIBXML_ICONV_ENABLED
1669 /* check whether iconv can handle this */
1670 icv_in = iconv_open("UTF-8", name);
1671 icv_out = iconv_open(name, "UTF-8");
1672 if (icv_in == (iconv_t) -1) {
1673 icv_in = iconv_open("UTF-8", upper);
1674 }
1675 if (icv_out == (iconv_t) -1) {
1676 icv_out = iconv_open(upper, "UTF-8");
1677 }
1678 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1679 enc = (xmlCharEncodingHandlerPtr)
1680 xmlMalloc(sizeof(xmlCharEncodingHandler));
1681 if (enc == NULL) {
1682 iconv_close(icv_in);
1683 iconv_close(icv_out);
1684 return(NULL);
1685 }
1686 enc->name = xmlMemStrdup(name);
1687 enc->input = NULL;
1688 enc->output = NULL;
1689 enc->iconv_in = icv_in;
1690 enc->iconv_out = icv_out;
1691 #ifdef DEBUG_ENCODING
1692 xmlGenericError(xmlGenericErrorContext,
1693 "Found iconv handler for encoding %s\n", name);
1694 #endif
1695 return enc;
1696 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1697 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1698 "iconv : problems with filters for '%s'\n", name);
1699 }
1700 #endif /* LIBXML_ICONV_ENABLED */
1701 #ifdef LIBXML_ICU_ENABLED
1702 /* check whether icu can handle this */
1703 ucv_in = openIcuConverter(name, 1);
1704 ucv_out = openIcuConverter(name, 0);
1705 if (ucv_in != NULL && ucv_out != NULL) {
1706 enc = (xmlCharEncodingHandlerPtr)
1707 xmlMalloc(sizeof(xmlCharEncodingHandler));
1708 if (enc == NULL) {
1709 closeIcuConverter(ucv_in);
1710 closeIcuConverter(ucv_out);
1711 return(NULL);
1712 }
1713 enc->name = xmlMemStrdup(name);
1714 enc->input = NULL;
1715 enc->output = NULL;
1716 enc->uconv_in = ucv_in;
1717 enc->uconv_out = ucv_out;
1718 #ifdef DEBUG_ENCODING
1719 xmlGenericError(xmlGenericErrorContext,
1720 "Found ICU converter handler for encoding %s\n", name);
1721 #endif
1722 return enc;
1723 } else if (ucv_in != NULL || ucv_out != NULL) {
1724 closeIcuConverter(ucv_in);
1725 closeIcuConverter(ucv_out);
1726 xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1727 "ICU converter : problems with filters for '%s'\n", name);
1728 }
1729 #endif /* LIBXML_ICU_ENABLED */
1730
1731 #ifdef DEBUG_ENCODING
1732 xmlGenericError(xmlGenericErrorContext,
1733 "No handler found for encoding %s\n", name);
1734 #endif
1735
1736 /*
1737 * Fallback using the canonical names
1738 */
1739 alias = xmlParseCharEncoding(norig);
1740 if (alias != XML_CHAR_ENCODING_ERROR) {
1741 const char* canon;
1742 canon = xmlGetCharEncodingName(alias);
1743 if ((canon != NULL) && (strcmp(name, canon))) {
1744 return(xmlFindCharEncodingHandler(canon));
1745 }
1746 }
1747
1748 /* If "none of the above", give up */
1749 return(NULL);
1750 }
1751
1752 /************************************************************************
1753 * *
1754 * ICONV based generic conversion functions *
1755 * *
1756 ************************************************************************/
1757
1758 #ifdef LIBXML_ICONV_ENABLED
1759 /**
1760 * xmlIconvWrapper:
1761 * @cd: iconv converter data structure
1762 * @out: a pointer to an array of bytes to store the result
1763 * @outlen: the length of @out
1764 * @in: a pointer to an array of ISO Latin 1 chars
1765 * @inlen: the length of @in
1766 *
1767 * Returns 0 if success, or
1768 * -1 by lack of space, or
1769 * -2 if the transcoding fails (for *in is not valid utf8 string or
1770 * the result of transformation can't fit into the encoding we want), or
1771 * -3 if there the last byte can't form a single output char.
1772 *
1773 * The value of @inlen after return is the number of octets consumed
1774 * as the return value is positive, else unpredictable.
1775 * The value of @outlen after return is the number of ocetes consumed.
1776 */
1777 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1778 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1779 const unsigned char *in, int *inlen) {
1780 size_t icv_inlen, icv_outlen;
1781 const char *icv_in = (const char *) in;
1782 char *icv_out = (char *) out;
1783 int ret;
1784
1785 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1786 if (outlen != NULL) *outlen = 0;
1787 return(-1);
1788 }
1789 icv_inlen = *inlen;
1790 icv_outlen = *outlen;
1791 ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1792 *inlen -= icv_inlen;
1793 *outlen -= icv_outlen;
1794 if ((icv_inlen != 0) || (ret == -1)) {
1795 #ifdef EILSEQ
1796 if (errno == EILSEQ) {
1797 return -2;
1798 } else
1799 #endif
1800 #ifdef E2BIG
1801 if (errno == E2BIG) {
1802 return -1;
1803 } else
1804 #endif
1805 #ifdef EINVAL
1806 if (errno == EINVAL) {
1807 return -3;
1808 } else
1809 #endif
1810 {
1811 return -3;
1812 }
1813 }
1814 return 0;
1815 }
1816 #endif /* LIBXML_ICONV_ENABLED */
1817
1818 /************************************************************************
1819 * *
1820 * ICU based generic conversion functions *
1821 * *
1822 ************************************************************************/
1823
1824 #ifdef LIBXML_ICU_ENABLED
1825 /**
1826 * xmlUconvWrapper:
1827 * @cd: ICU uconverter data structure
1828 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1829 * @out: a pointer to an array of bytes to store the result
1830 * @outlen: the length of @out
1831 * @in: a pointer to an array of ISO Latin 1 chars
1832 * @inlen: the length of @in
1833 *
1834 * Returns 0 if success, or
1835 * -1 by lack of space, or
1836 * -2 if the transcoding fails (for *in is not valid utf8 string or
1837 * the result of transformation can't fit into the encoding we want), or
1838 * -3 if there the last byte can't form a single output char.
1839 *
1840 * The value of @inlen after return is the number of octets consumed
1841 * as the return value is positive, else unpredictable.
1842 * The value of @outlen after return is the number of ocetes consumed.
1843 */
1844 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1845 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1846 const unsigned char *in, int *inlen) {
1847 const char *ucv_in = (const char *) in;
1848 char *ucv_out = (char *) out;
1849 UErrorCode err = U_ZERO_ERROR;
1850
1851 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1852 if (outlen != NULL) *outlen = 0;
1853 return(-1);
1854 }
1855
1856 /*
1857 * TODO(jungshik)
1858 * 1. is ucnv_convert(To|From)Algorithmic better?
1859 * 2. had we better use an explicit pivot buffer?
1860 * 3. error returned comes from 'fromUnicode' only even
1861 * when toUnicode is true !
1862 */
1863 if (toUnicode) {
1864 /* encoding => UTF-16 => UTF-8 */
1865 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1866 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1867 0, TRUE, &err);
1868 } else {
1869 /* UTF-8 => UTF-16 => encoding */
1870 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1871 &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1872 0, TRUE, &err);
1873 }
1874 *inlen = ucv_in - (const char*) in;
1875 *outlen = ucv_out - (char *) out;
1876 if (U_SUCCESS(err))
1877 return 0;
1878 if (err == U_BUFFER_OVERFLOW_ERROR)
1879 return -1;
1880 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1881 return -2;
1882 /* if (err == U_TRUNCATED_CHAR_FOUND) */
1883 return -3;
1884 }
1885 #endif /* LIBXML_ICU_ENABLED */
1886
1887 /************************************************************************
1888 * *
1889 * The real API used by libxml for on-the-fly conversion *
1890 * *
1891 ************************************************************************/
1892
1893 /**
1894 * xmlCharEncFirstLine:
1895 * @handler: char enconding transformation data structure
1896 * @out: an xmlBuffer for the output.
1897 * @in: an xmlBuffer for the input
1898 *
1899 * Front-end for the encoding handler input function, but handle only
1900 * the very first line, i.e. limit itself to 45 chars.
1901 *
1902 * Returns the number of byte written if success, or
1903 * -1 general error
1904 * -2 if the transcoding fails (for *in is not valid utf8 string or
1905 * the result of transformation can't fit into the encoding we want), or
1906 */
1907 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)1908 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1909 xmlBufferPtr in) {
1910 int ret = -2;
1911 int written;
1912 int toconv;
1913
1914 if (handler == NULL) return(-1);
1915 if (out == NULL) return(-1);
1916 if (in == NULL) return(-1);
1917
1918 /* calculate space available */
1919 written = out->size - out->use;
1920 toconv = in->use;
1921 /*
1922 * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1923 * 45 chars should be sufficient to reach the end of the encoding
1924 * declaration without going too far inside the document content.
1925 * on UTF-16 this means 90bytes, on UCS4 this means 180
1926 */
1927 if (toconv > 180)
1928 toconv = 180;
1929 if (toconv * 2 >= written) {
1930 xmlBufferGrow(out, toconv);
1931 written = out->size - out->use - 1;
1932 }
1933
1934 if (handler->input != NULL) {
1935 ret = handler->input(&out->content[out->use], &written,
1936 in->content, &toconv);
1937 xmlBufferShrink(in, toconv);
1938 out->use += written;
1939 out->content[out->use] = 0;
1940 }
1941 #ifdef LIBXML_ICONV_ENABLED
1942 else if (handler->iconv_in != NULL) {
1943 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1944 &written, in->content, &toconv);
1945 xmlBufferShrink(in, toconv);
1946 out->use += written;
1947 out->content[out->use] = 0;
1948 if (ret == -1) ret = -3;
1949 }
1950 #endif /* LIBXML_ICONV_ENABLED */
1951 #ifdef LIBXML_ICU_ENABLED
1952 else if (handler->uconv_in != NULL) {
1953 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1954 &written, in->content, &toconv);
1955 xmlBufferShrink(in, toconv);
1956 out->use += written;
1957 out->content[out->use] = 0;
1958 if (ret == -1) ret = -3;
1959 }
1960 #endif /* LIBXML_ICU_ENABLED */
1961 #ifdef DEBUG_ENCODING
1962 switch (ret) {
1963 case 0:
1964 xmlGenericError(xmlGenericErrorContext,
1965 "converted %d bytes to %d bytes of input\n",
1966 toconv, written);
1967 break;
1968 case -1:
1969 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1970 toconv, written, in->use);
1971 break;
1972 case -2:
1973 xmlGenericError(xmlGenericErrorContext,
1974 "input conversion failed due to input error\n");
1975 break;
1976 case -3:
1977 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1978 toconv, written, in->use);
1979 break;
1980 default:
1981 xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1982 }
1983 #endif /* DEBUG_ENCODING */
1984 /*
1985 * Ignore when input buffer is not on a boundary
1986 */
1987 if (ret == -3) ret = 0;
1988 if (ret == -1) ret = 0;
1989 return(ret);
1990 }
1991
1992 /**
1993 * xmlCharEncInFunc:
1994 * @handler: char encoding transformation data structure
1995 * @out: an xmlBuffer for the output.
1996 * @in: an xmlBuffer for the input
1997 *
1998 * Generic front-end for the encoding handler input function
1999 *
2000 * Returns the number of byte written if success, or
2001 * -1 general error
2002 * -2 if the transcoding fails (for *in is not valid utf8 string or
2003 * the result of transformation can't fit into the encoding we want), or
2004 */
2005 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2006 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2007 xmlBufferPtr in)
2008 {
2009 int ret = -2;
2010 int written;
2011 int toconv;
2012
2013 if (handler == NULL)
2014 return (-1);
2015 if (out == NULL)
2016 return (-1);
2017 if (in == NULL)
2018 return (-1);
2019
2020 toconv = in->use;
2021 if (toconv == 0)
2022 return (0);
2023 written = out->size - out->use;
2024 if (toconv * 2 >= written) {
2025 xmlBufferGrow(out, out->size + toconv * 2);
2026 written = out->size - out->use - 1;
2027 }
2028 if (handler->input != NULL) {
2029 ret = handler->input(&out->content[out->use], &written,
2030 in->content, &toconv);
2031 xmlBufferShrink(in, toconv);
2032 out->use += written;
2033 out->content[out->use] = 0;
2034 }
2035 #ifdef LIBXML_ICONV_ENABLED
2036 else if (handler->iconv_in != NULL) {
2037 ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2038 &written, in->content, &toconv);
2039 xmlBufferShrink(in, toconv);
2040 out->use += written;
2041 out->content[out->use] = 0;
2042 if (ret == -1)
2043 ret = -3;
2044 }
2045 #endif /* LIBXML_ICONV_ENABLED */
2046 #ifdef LIBXML_ICU_ENABLED
2047 else if (handler->uconv_in != NULL) {
2048 ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2049 &written, in->content, &toconv);
2050 xmlBufferShrink(in, toconv);
2051 out->use += written;
2052 out->content[out->use] = 0;
2053 if (ret == -1)
2054 ret = -3;
2055 }
2056 #endif /* LIBXML_ICU_ENABLED */
2057 switch (ret) {
2058 case 0:
2059 #ifdef DEBUG_ENCODING
2060 xmlGenericError(xmlGenericErrorContext,
2061 "converted %d bytes to %d bytes of input\n",
2062 toconv, written);
2063 #endif
2064 break;
2065 case -1:
2066 #ifdef DEBUG_ENCODING
2067 xmlGenericError(xmlGenericErrorContext,
2068 "converted %d bytes to %d bytes of input, %d left\n",
2069 toconv, written, in->use);
2070 #endif
2071 break;
2072 case -3:
2073 #ifdef DEBUG_ENCODING
2074 xmlGenericError(xmlGenericErrorContext,
2075 "converted %d bytes to %d bytes of input, %d left\n",
2076 toconv, written, in->use);
2077 #endif
2078 break;
2079 case -2: {
2080 char buf[50];
2081
2082 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2083 in->content[0], in->content[1],
2084 in->content[2], in->content[3]);
2085 buf[49] = 0;
2086 xmlEncodingErr(XML_I18N_CONV_FAILED,
2087 "input conversion failed due to input error, bytes %s\n",
2088 buf);
2089 }
2090 }
2091 /*
2092 * Ignore when input buffer is not on a boundary
2093 */
2094 if (ret == -3)
2095 ret = 0;
2096 return (written? written : ret);
2097 }
2098
2099 /**
2100 * xmlCharEncOutFunc:
2101 * @handler: char enconding transformation data structure
2102 * @out: an xmlBuffer for the output.
2103 * @in: an xmlBuffer for the input
2104 *
2105 * Generic front-end for the encoding handler output function
2106 * a first call with @in == NULL has to be made firs to initiate the
2107 * output in case of non-stateless encoding needing to initiate their
2108 * state or the output (like the BOM in UTF16).
2109 * In case of UTF8 sequence conversion errors for the given encoder,
2110 * the content will be automatically remapped to a CharRef sequence.
2111 *
2112 * Returns the number of byte written if success, or
2113 * -1 general error
2114 * -2 if the transcoding fails (for *in is not valid utf8 string or
2115 * the result of transformation can't fit into the encoding we want), or
2116 */
2117 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2118 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2119 xmlBufferPtr in) {
2120 int ret = -2;
2121 int written;
2122 int writtentot = 0;
2123 int toconv;
2124 int output = 0;
2125
2126 if (handler == NULL) return(-1);
2127 if (out == NULL) return(-1);
2128
2129 retry:
2130
2131 written = out->size - out->use;
2132
2133 if (written > 0)
2134 written--; /* Gennady: count '/0' */
2135
2136 /*
2137 * First specific handling of in = NULL, i.e. the initialization call
2138 */
2139 if (in == NULL) {
2140 toconv = 0;
2141 if (handler->output != NULL) {
2142 ret = handler->output(&out->content[out->use], &written,
2143 NULL, &toconv);
2144 if (ret >= 0) { /* Gennady: check return value */
2145 out->use += written;
2146 out->content[out->use] = 0;
2147 }
2148 }
2149 #ifdef LIBXML_ICONV_ENABLED
2150 else if (handler->iconv_out != NULL) {
2151 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2152 &written, NULL, &toconv);
2153 out->use += written;
2154 out->content[out->use] = 0;
2155 }
2156 #endif /* LIBXML_ICONV_ENABLED */
2157 #ifdef LIBXML_ICU_ENABLED
2158 else if (handler->uconv_out != NULL) {
2159 ret = xmlUconvWrapper(handler->uconv_out, 0,
2160 &out->content[out->use],
2161 &written, NULL, &toconv);
2162 out->use += written;
2163 out->content[out->use] = 0;
2164 }
2165 #endif /* LIBXML_ICU_ENABLED */
2166 #ifdef DEBUG_ENCODING
2167 xmlGenericError(xmlGenericErrorContext,
2168 "initialized encoder\n");
2169 #endif
2170 return(0);
2171 }
2172
2173 /*
2174 * Conversion itself.
2175 */
2176 toconv = in->use;
2177 if (toconv == 0)
2178 return(0);
2179 if (toconv * 4 >= written) {
2180 xmlBufferGrow(out, toconv * 4);
2181 written = out->size - out->use - 1;
2182 }
2183 if (handler->output != NULL) {
2184 ret = handler->output(&out->content[out->use], &written,
2185 in->content, &toconv);
2186 if (written > 0) {
2187 xmlBufferShrink(in, toconv);
2188 out->use += written;
2189 writtentot += written;
2190 }
2191 out->content[out->use] = 0;
2192 }
2193 #ifdef LIBXML_ICONV_ENABLED
2194 else if (handler->iconv_out != NULL) {
2195 ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2196 &written, in->content, &toconv);
2197 xmlBufferShrink(in, toconv);
2198 out->use += written;
2199 writtentot += written;
2200 out->content[out->use] = 0;
2201 if (ret == -1) {
2202 if (written > 0) {
2203 /*
2204 * Can be a limitation of iconv
2205 */
2206 goto retry;
2207 }
2208 ret = -3;
2209 }
2210 }
2211 #endif /* LIBXML_ICONV_ENABLED */
2212 #ifdef LIBXML_ICU_ENABLED
2213 else if (handler->uconv_out != NULL) {
2214 ret = xmlUconvWrapper(handler->uconv_out, 0,
2215 &out->content[out->use],
2216 &written, in->content, &toconv);
2217 xmlBufferShrink(in, toconv);
2218 out->use += written;
2219 writtentot += written;
2220 out->content[out->use] = 0;
2221 if (ret == -1) {
2222 if (written > 0) {
2223 /*
2224 * Can be a limitation of iconv
2225 */
2226 goto retry;
2227 }
2228 ret = -3;
2229 }
2230 }
2231 #endif /* LIBXML_ICU_ENABLED */
2232 else {
2233 xmlEncodingErr(XML_I18N_NO_OUTPUT,
2234 "xmlCharEncOutFunc: no output function !\n", NULL);
2235 return(-1);
2236 }
2237
2238 if (ret >= 0) output += ret;
2239
2240 /*
2241 * Attempt to handle error cases
2242 */
2243 switch (ret) {
2244 case 0:
2245 #ifdef DEBUG_ENCODING
2246 xmlGenericError(xmlGenericErrorContext,
2247 "converted %d bytes to %d bytes of output\n",
2248 toconv, written);
2249 #endif
2250 break;
2251 case -1:
2252 #ifdef DEBUG_ENCODING
2253 xmlGenericError(xmlGenericErrorContext,
2254 "output conversion failed by lack of space\n");
2255 #endif
2256 break;
2257 case -3:
2258 #ifdef DEBUG_ENCODING
2259 xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2260 toconv, written, in->use);
2261 #endif
2262 break;
2263 case -2: {
2264 int len = in->use;
2265 const xmlChar *utf = (const xmlChar *) in->content;
2266 int cur;
2267
2268 cur = xmlGetUTF8Char(utf, &len);
2269 if (cur > 0) {
2270 xmlChar charref[20];
2271
2272 #ifdef DEBUG_ENCODING
2273 xmlGenericError(xmlGenericErrorContext,
2274 "handling output conversion error\n");
2275 xmlGenericError(xmlGenericErrorContext,
2276 "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2277 in->content[0], in->content[1],
2278 in->content[2], in->content[3]);
2279 #endif
2280 /*
2281 * Removes the UTF8 sequence, and replace it by a charref
2282 * and continue the transcoding phase, hoping the error
2283 * did not mangle the encoder state.
2284 */
2285 snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
2286 xmlBufferShrink(in, len);
2287 xmlBufferAddHead(in, charref, -1);
2288
2289 goto retry;
2290 } else {
2291 char buf[50];
2292
2293 snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2294 in->content[0], in->content[1],
2295 in->content[2], in->content[3]);
2296 buf[49] = 0;
2297 xmlEncodingErr(XML_I18N_CONV_FAILED,
2298 "output conversion failed due to conv error, bytes %s\n",
2299 buf);
2300 if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2301 in->content[0] = ' ';
2302 }
2303 break;
2304 }
2305 }
2306 return(ret);
2307 }
2308
2309 /**
2310 * xmlCharEncCloseFunc:
2311 * @handler: char enconding transformation data structure
2312 *
2313 * Generic front-end for encoding handler close function
2314 *
2315 * Returns 0 if success, or -1 in case of error
2316 */
2317 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2318 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2319 int ret = 0;
2320 if (handler == NULL) return(-1);
2321 if (handler->name == NULL) return(-1);
2322 #ifdef LIBXML_ICONV_ENABLED
2323 /*
2324 * Iconv handlers can be used only once, free the whole block.
2325 * and the associated icon resources.
2326 */
2327 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2328 if (handler->name != NULL)
2329 xmlFree(handler->name);
2330 handler->name = NULL;
2331 if (handler->iconv_out != NULL) {
2332 if (iconv_close(handler->iconv_out))
2333 ret = -1;
2334 handler->iconv_out = NULL;
2335 }
2336 if (handler->iconv_in != NULL) {
2337 if (iconv_close(handler->iconv_in))
2338 ret = -1;
2339 handler->iconv_in = NULL;
2340 }
2341 xmlFree(handler);
2342 }
2343 #endif /* LIBXML_ICONV_ENABLED */
2344 #ifdef LIBXML_ICU_ENABLED
2345 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2346 if (handler->name != NULL)
2347 xmlFree(handler->name);
2348 handler->name = NULL;
2349 if (handler->uconv_out != NULL) {
2350 closeIcuConverter(handler->uconv_out);
2351 handler->uconv_out = NULL;
2352 }
2353 if (handler->uconv_in != NULL) {
2354 closeIcuConverter(handler->uconv_in);
2355 handler->uconv_in = NULL;
2356 }
2357 xmlFree(handler);
2358 }
2359 #endif
2360 #ifdef DEBUG_ENCODING
2361 if (ret)
2362 xmlGenericError(xmlGenericErrorContext,
2363 "failed to close the encoding handler\n");
2364 else
2365 xmlGenericError(xmlGenericErrorContext,
2366 "closed the encoding handler\n");
2367 #endif
2368
2369 return(ret);
2370 }
2371
2372 /**
2373 * xmlByteConsumed:
2374 * @ctxt: an XML parser context
2375 *
2376 * This function provides the current index of the parser relative
2377 * to the start of the current entity. This function is computed in
2378 * bytes from the beginning starting at zero and finishing at the
2379 * size in byte of the file if parsing a file. The function is
2380 * of constant cost if the input is UTF-8 but can be costly if run
2381 * on non-UTF-8 input.
2382 *
2383 * Returns the index in bytes from the beginning of the entity or -1
2384 * in case the index could not be computed.
2385 */
2386 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2387 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2388 xmlParserInputPtr in;
2389
2390 if (ctxt == NULL) return(-1);
2391 in = ctxt->input;
2392 if (in == NULL) return(-1);
2393 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2394 unsigned int unused = 0;
2395 xmlCharEncodingHandler * handler = in->buf->encoder;
2396 /*
2397 * Encoding conversion, compute the number of unused original
2398 * bytes from the input not consumed and substract that from
2399 * the raw consumed value, this is not a cheap operation
2400 */
2401 if (in->end - in->cur > 0) {
2402 unsigned char convbuf[32000];
2403 const unsigned char *cur = (const unsigned char *)in->cur;
2404 int toconv = in->end - in->cur, written = 32000;
2405
2406 int ret;
2407
2408 if (handler->output != NULL) {
2409 do {
2410 toconv = in->end - cur;
2411 written = 32000;
2412 ret = handler->output(&convbuf[0], &written,
2413 cur, &toconv);
2414 if (ret == -1) return(-1);
2415 unused += written;
2416 cur += toconv;
2417 } while (ret == -2);
2418 #ifdef LIBXML_ICONV_ENABLED
2419 } else if (handler->iconv_out != NULL) {
2420 do {
2421 toconv = in->end - cur;
2422 written = 32000;
2423 ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2424 &written, cur, &toconv);
2425 if (ret < 0) {
2426 if (written > 0)
2427 ret = -2;
2428 else
2429 return(-1);
2430 }
2431 unused += written;
2432 cur += toconv;
2433 } while (ret == -2);
2434 #endif
2435 #ifdef LIBXML_ICU_ENABLED
2436 } else if (handler->uconv_out != NULL) {
2437 do {
2438 toconv = in->end - cur;
2439 written = 32000;
2440 ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2441 &written, cur, &toconv);
2442 if (ret < 0) {
2443 if (written > 0)
2444 ret = -2;
2445 else
2446 return(-1);
2447 }
2448 unused += written;
2449 cur += toconv;
2450 } while (ret == -2);
2451 } else {
2452 /* could not find a converter */
2453 return(-1);
2454 }
2455 }
2456 if (in->buf->rawconsumed < unused)
2457 return(-1);
2458 return(in->buf->rawconsumed - unused);
2459 }
2460 return(in->consumed + (in->cur - in->base));
2461 }
2462 #endif
2463
2464 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2465 #ifdef LIBXML_ISO8859X_ENABLED
2466
2467 /**
2468 * UTF8ToISO8859x:
2469 * @out: a pointer to an array of bytes to store the result
2470 * @outlen: the length of @out
2471 * @in: a pointer to an array of UTF-8 chars
2472 * @inlen: the length of @in
2473 * @xlattable: the 2-level transcoding table
2474 *
2475 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2476 * block of chars out.
2477 *
2478 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2479 * The value of @inlen after return is the number of octets consumed
2480 * as the return value is positive, else unpredictable.
2481 * The value of @outlen after return is the number of ocetes consumed.
2482 */
2483 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2484 UTF8ToISO8859x(unsigned char* out, int *outlen,
2485 const unsigned char* in, int *inlen,
2486 unsigned char const *xlattable) {
2487 const unsigned char* outstart = out;
2488 const unsigned char* inend;
2489 const unsigned char* instart = in;
2490
2491 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2492 (xlattable == NULL))
2493 return(-1);
2494 if (in == NULL) {
2495 /*
2496 * initialization nothing to do
2497 */
2498 *outlen = 0;
2499 *inlen = 0;
2500 return(0);
2501 }
2502 inend = in + (*inlen);
2503 while (in < inend) {
2504 unsigned char d = *in++;
2505 if (d < 0x80) {
2506 *out++ = d;
2507 } else if (d < 0xC0) {
2508 /* trailing byte in leading position */
2509 *outlen = out - outstart;
2510 *inlen = in - instart - 1;
2511 return(-2);
2512 } else if (d < 0xE0) {
2513 unsigned char c;
2514 if (!(in < inend)) {
2515 /* trailing byte not in input buffer */
2516 *outlen = out - outstart;
2517 *inlen = in - instart - 1;
2518 return(-2);
2519 }
2520 c = *in++;
2521 if ((c & 0xC0) != 0x80) {
2522 /* not a trailing byte */
2523 *outlen = out - outstart;
2524 *inlen = in - instart - 2;
2525 return(-2);
2526 }
2527 c = c & 0x3F;
2528 d = d & 0x1F;
2529 d = xlattable [48 + c + xlattable [d] * 64];
2530 if (d == 0) {
2531 /* not in character set */
2532 *outlen = out - outstart;
2533 *inlen = in - instart - 2;
2534 return(-2);
2535 }
2536 *out++ = d;
2537 } else if (d < 0xF0) {
2538 unsigned char c1;
2539 unsigned char c2;
2540 if (!(in < inend - 1)) {
2541 /* trailing bytes not in input buffer */
2542 *outlen = out - outstart;
2543 *inlen = in - instart - 1;
2544 return(-2);
2545 }
2546 c1 = *in++;
2547 if ((c1 & 0xC0) != 0x80) {
2548 /* not a trailing byte (c1) */
2549 *outlen = out - outstart;
2550 *inlen = in - instart - 2;
2551 return(-2);
2552 }
2553 c2 = *in++;
2554 if ((c2 & 0xC0) != 0x80) {
2555 /* not a trailing byte (c2) */
2556 *outlen = out - outstart;
2557 *inlen = in - instart - 2;
2558 return(-2);
2559 }
2560 c1 = c1 & 0x3F;
2561 c2 = c2 & 0x3F;
2562 d = d & 0x0F;
2563 d = xlattable [48 + c2 + xlattable [48 + c1 +
2564 xlattable [32 + d] * 64] * 64];
2565 if (d == 0) {
2566 /* not in character set */
2567 *outlen = out - outstart;
2568 *inlen = in - instart - 3;
2569 return(-2);
2570 }
2571 *out++ = d;
2572 } else {
2573 /* cannot transcode >= U+010000 */
2574 *outlen = out - outstart;
2575 *inlen = in - instart - 1;
2576 return(-2);
2577 }
2578 }
2579 *outlen = out - outstart;
2580 *inlen = in - instart;
2581 return(*outlen);
2582 }
2583
2584 /**
2585 * ISO8859xToUTF8
2586 * @out: a pointer to an array of bytes to store the result
2587 * @outlen: the length of @out
2588 * @in: a pointer to an array of ISO Latin 1 chars
2589 * @inlen: the length of @in
2590 *
2591 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2592 * block of chars out.
2593 * Returns 0 if success, or -1 otherwise
2594 * The value of @inlen after return is the number of octets consumed
2595 * The value of @outlen after return is the number of ocetes produced.
2596 */
2597 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)2598 ISO8859xToUTF8(unsigned char* out, int *outlen,
2599 const unsigned char* in, int *inlen,
2600 unsigned short const *unicodetable) {
2601 unsigned char* outstart = out;
2602 unsigned char* outend;
2603 const unsigned char* instart = in;
2604 const unsigned char* inend;
2605 const unsigned char* instop;
2606 unsigned int c;
2607
2608 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2609 (in == NULL) || (unicodetable == NULL))
2610 return(-1);
2611 outend = out + *outlen;
2612 inend = in + *inlen;
2613 instop = inend;
2614 c = *in;
2615 while (in < inend && out < outend - 1) {
2616 if (c >= 0x80) {
2617 c = unicodetable [c - 0x80];
2618 if (c == 0) {
2619 /* undefined code point */
2620 *outlen = out - outstart;
2621 *inlen = in - instart;
2622 return (-1);
2623 }
2624 if (c < 0x800) {
2625 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2626 *out++ = (c & 0x3F) | 0x80;
2627 } else {
2628 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2629 *out++ = ((c >> 6) & 0x3F) | 0x80;
2630 *out++ = (c & 0x3F) | 0x80;
2631 }
2632 ++in;
2633 c = *in;
2634 }
2635 if (instop - in > outend - out) instop = in + (outend - out);
2636 while (c < 0x80 && in < instop) {
2637 *out++ = c;
2638 ++in;
2639 c = *in;
2640 }
2641 }
2642 if (in < inend && out < outend && c < 0x80) {
2643 *out++ = c;
2644 ++in;
2645 }
2646 *outlen = out - outstart;
2647 *inlen = in - instart;
2648 return (*outlen);
2649 }
2650
2651
2652 /************************************************************************
2653 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2654 ************************************************************************/
2655
2656 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2657 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2658 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2659 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2660 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2661 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2662 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2663 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2664 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2665 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2666 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2667 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2668 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2669 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2670 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2671 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2672 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2673 };
2674
2675 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2676 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2683 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2684 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2685 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2686 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2687 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2688 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2689 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2690 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2691 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2692 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2693 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2694 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2695 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2696 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2697 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2698 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2699 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2700 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2701 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2702 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2703 };
2704
2705 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2706 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2707 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2708 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2709 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2710 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2711 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2712 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2713 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2714 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2715 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2716 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2717 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2718 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2719 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2720 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2721 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2722 };
2723
2724 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2725 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2726 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2728 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2729 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2730 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2731 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2732 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2733 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2734 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2735 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2736 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2737 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2738 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2739 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2740 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2742 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2743 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2744 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2745 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2746 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2747 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2750 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2751 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2752 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2753 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2754 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2755 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2756 };
2757
2758 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2759 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2760 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2761 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2762 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2763 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2764 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2765 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2766 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2767 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2768 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2769 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2770 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2771 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2772 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2773 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2774 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2775 };
2776
2777 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2778 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2779 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2781 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2782 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2783 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2784 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2785 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2786 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2787 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2788 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2789 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2790 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2791 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2792 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2793 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2794 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2795 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2796 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2797 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2800 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2802 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2803 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2804 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2805 };
2806
2807 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2808 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2809 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2810 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2811 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2812 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2813 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2814 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2815 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2816 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2817 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2818 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2819 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2820 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2821 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2822 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2823 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2824 };
2825
2826 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2827 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2829 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2830 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2831 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2832 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2833 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2834 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2835 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2836 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2837 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2839 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2840 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2841 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2842 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2843 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2844 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2845 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2846 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2847 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2848 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2851 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 };
2855
2856 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2857 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2858 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2859 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2860 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2861 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2862 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2863 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2864 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2865 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2866 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2867 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2868 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2869 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2870 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2871 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2872 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2873 };
2874
2875 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2876 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2878 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2883 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2884 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2885 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2886 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2893 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2894 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2895 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2896 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2897 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2899 };
2900
2901 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2902 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2903 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2904 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2905 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2906 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2907 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2908 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2909 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2910 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2911 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2912 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2913 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2914 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2915 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2916 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2917 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2918 };
2919
2920 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2921 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2922 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2924 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2925 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2926 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2927 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2928 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2929 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2930 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2931 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2932 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2936 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2938 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2945 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2946 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2947 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2948 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2949 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2950 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2952 };
2953
2954 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2955 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2956 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2957 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2958 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2959 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2960 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2961 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2962 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2963 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2964 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2965 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2966 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2967 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2968 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2969 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2970 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2971 };
2972
2973 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2974 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2976 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2982 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2983 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2984 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2985 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2993 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2998 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3003 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3004 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3005 };
3006
3007 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3008 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3009 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3010 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3011 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3012 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3013 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3014 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3015 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3016 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3017 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3018 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3019 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3020 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3021 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3022 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3023 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3024 };
3025
3026 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3027 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3029 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3035 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3036 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3037 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3038 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3039 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3040 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3041 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3042 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3044 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3048 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 };
3051
3052 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3053 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3054 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3055 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3056 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3057 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3058 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3059 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3060 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3061 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3062 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3063 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3064 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3065 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3066 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3067 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3068 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3069 };
3070
3071 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3072 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3079 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3080 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3081 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3082 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3083 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3084 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3085 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3086 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3087 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3088 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3090 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3091 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3100 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3101 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3102 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3103 };
3104
3105 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3106 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3107 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3108 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3109 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3110 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3111 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3112 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3113 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3114 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3115 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3116 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3117 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3118 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3119 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3120 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3121 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3122 };
3123
3124 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3125 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3133 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3134 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3140 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3141 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3142 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3143 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3144 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3149 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3150 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 };
3153
3154 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3155 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3156 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3157 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3158 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3159 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3160 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3161 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3162 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3163 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3164 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3165 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3166 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3167 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3168 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3169 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3170 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3171 };
3172
3173 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3174 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3182 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3183 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3184 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3185 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3191 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3194 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3195 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3196 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3197 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3198 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3199 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3200 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3201 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3202 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3203 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3204 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3205 };
3206
3207 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3208 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3209 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3210 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3211 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3212 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3213 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3214 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3215 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3216 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3217 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3218 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3219 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3220 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3221 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3222 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3223 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3224 };
3225
3226 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3227 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3235 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3236 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3237 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3242 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3247 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3249 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3262 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3264 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3265 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3267 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3268 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3269 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3270 };
3271
3272 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3273 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3274 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3275 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3276 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3277 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3278 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3279 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3280 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3281 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3282 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3283 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3284 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3285 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3286 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3287 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3288 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3289 };
3290
3291 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3292 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3300 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3301 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3302 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3303 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3310 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3315 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3316 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3317 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3318 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3319 };
3320
3321 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3322 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3323 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3324 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3325 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3326 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3327 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3328 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3329 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3330 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3331 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3332 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3333 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3334 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3335 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3336 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3337 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3338 };
3339
3340 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3341 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3342 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3349 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3350 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3351 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3352 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3353 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3354 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3358 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3360 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3370 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3377 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3378 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3379 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3380 };
3381
3382
3383 /*
3384 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3385 */
3386
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3387 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3388 const unsigned char* in, int *inlen) {
3389 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3390 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3391 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3392 const unsigned char* in, int *inlen) {
3393 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3394 }
3395
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3396 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3397 const unsigned char* in, int *inlen) {
3398 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3399 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3400 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3401 const unsigned char* in, int *inlen) {
3402 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3403 }
3404
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3405 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3406 const unsigned char* in, int *inlen) {
3407 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3408 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3409 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3410 const unsigned char* in, int *inlen) {
3411 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3412 }
3413
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3414 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3415 const unsigned char* in, int *inlen) {
3416 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3417 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3418 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3419 const unsigned char* in, int *inlen) {
3420 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3421 }
3422
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3423 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3424 const unsigned char* in, int *inlen) {
3425 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3426 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3427 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3428 const unsigned char* in, int *inlen) {
3429 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3430 }
3431
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3432 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3433 const unsigned char* in, int *inlen) {
3434 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3435 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3436 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3437 const unsigned char* in, int *inlen) {
3438 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3439 }
3440
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3441 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3442 const unsigned char* in, int *inlen) {
3443 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3444 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3445 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3446 const unsigned char* in, int *inlen) {
3447 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3448 }
3449
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3450 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3451 const unsigned char* in, int *inlen) {
3452 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3453 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3454 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3455 const unsigned char* in, int *inlen) {
3456 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3457 }
3458
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3459 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3460 const unsigned char* in, int *inlen) {
3461 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3462 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3463 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3464 const unsigned char* in, int *inlen) {
3465 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3466 }
3467
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3468 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3469 const unsigned char* in, int *inlen) {
3470 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3471 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3472 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3473 const unsigned char* in, int *inlen) {
3474 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3475 }
3476
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3477 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3478 const unsigned char* in, int *inlen) {
3479 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3480 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3481 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3482 const unsigned char* in, int *inlen) {
3483 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3484 }
3485
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3486 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3487 const unsigned char* in, int *inlen) {
3488 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3489 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3490 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3491 const unsigned char* in, int *inlen) {
3492 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3493 }
3494
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3495 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3496 const unsigned char* in, int *inlen) {
3497 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3498 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3499 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3500 const unsigned char* in, int *inlen) {
3501 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3502 }
3503
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3504 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3505 const unsigned char* in, int *inlen) {
3506 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3507 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3508 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3509 const unsigned char* in, int *inlen) {
3510 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3511 }
3512
3513 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3514 xmlRegisterCharEncodingHandlersISO8859x (void) {
3515 xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3516 xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3517 xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3518 xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3519 xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3520 xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3521 xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3522 xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3523 xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3524 xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3525 xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3526 xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3527 xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3528 xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3529 }
3530
3531 #endif
3532 #endif
3533
3534 #define bottom_encoding
3535 #include "elfgcchack.h"
3536