• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * encoding.c : implements the encoding conversion functions needed for XML
3  *
4  * Related specs:
5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8  * [ISO-8859-1]   ISO Latin-1 characters codes.
9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10  *                Worldwide Character Encoding -- Version 1.0", Addison-
11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12  *                described in Unicode Technical Report #4.
13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14  *                Information Interchange, ANSI X3.4-1986.
15  *
16  * See Copyright for the status of this software.
17  *
18  * daniel@veillard.com
19  *
20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21  */
22 
23 #define IN_LIBXML
24 #include "libxml.h"
25 
26 #include <string.h>
27 #include <limits.h>
28 
29 #ifdef HAVE_CTYPE_H
30 #include <ctype.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef LIBXML_ICONV_ENABLED
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #endif
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
44 #endif
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
47 
48 #include "buf.h"
49 #include "enc.h"
50 
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53 
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
57     const char *name;
58     const char *alias;
59 };
60 
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
64 
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66 #if 0
67 #define DEBUG_ENCODING  /* Define this to get encoding traces */
68 #endif
69 #else
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
72 #endif
73 #endif
74 
75 static int xmlLittleEndian = 1;
76 
77 /**
78  * xmlEncodingErrMemory:
79  * @extra:  extra information
80  *
81  * Handle an out of memory condition
82  */
83 static void
xmlEncodingErrMemory(const char * extra)84 xmlEncodingErrMemory(const char *extra)
85 {
86     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87 }
88 
89 /**
90  * xmlErrEncoding:
91  * @error:  the error number
92  * @msg:  the error message
93  *
94  * n encoding error
95  */
96 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98 {
99     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100                     XML_FROM_I18N, error, XML_ERR_FATAL,
101                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102 }
103 
104 #ifdef LIBXML_ICU_ENABLED
105 static uconv_t*
openIcuConverter(const char * name,int toUnicode)106 openIcuConverter(const char* name, int toUnicode)
107 {
108   UErrorCode status = U_ZERO_ERROR;
109   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110   if (conv == NULL)
111     return NULL;
112 
113   conv->pivot_source = conv->pivot_buf;
114   conv->pivot_target = conv->pivot_buf;
115 
116   conv->uconv = ucnv_open(name, &status);
117   if (U_FAILURE(status))
118     goto error;
119 
120   status = U_ZERO_ERROR;
121   if (toUnicode) {
122     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123                         NULL, NULL, NULL, &status);
124   }
125   else {
126     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127                         NULL, NULL, NULL, &status);
128   }
129   if (U_FAILURE(status))
130     goto error;
131 
132   status = U_ZERO_ERROR;
133   conv->utf8 = ucnv_open("UTF-8", &status);
134   if (U_SUCCESS(status))
135     return conv;
136 
137 error:
138   if (conv->uconv)
139     ucnv_close(conv->uconv);
140   xmlFree(conv);
141   return NULL;
142 }
143 
144 static void
closeIcuConverter(uconv_t * conv)145 closeIcuConverter(uconv_t *conv)
146 {
147   if (conv != NULL) {
148     ucnv_close(conv->uconv);
149     ucnv_close(conv->utf8);
150     xmlFree(conv);
151   }
152 }
153 #endif /* LIBXML_ICU_ENABLED */
154 
155 /************************************************************************
156  *									*
157  *		Conversions To/From UTF8 encoding			*
158  *									*
159  ************************************************************************/
160 
161 /**
162  * asciiToUTF8:
163  * @out:  a pointer to an array of bytes to store the result
164  * @outlen:  the length of @out
165  * @in:  a pointer to an array of ASCII chars
166  * @inlen:  the length of @in
167  *
168  * Take a block of ASCII chars in and try to convert it to an UTF-8
169  * block of chars out.
170  * Returns 0 if success, or -1 otherwise
171  * The value of @inlen after return is the number of octets consumed
172  *     if the return value is positive, else unpredictable.
173  * The value of @outlen after return is the number of octets produced.
174  */
175 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)176 asciiToUTF8(unsigned char* out, int *outlen,
177               const unsigned char* in, int *inlen) {
178     unsigned char* outstart = out;
179     const unsigned char* base = in;
180     const unsigned char* processed = in;
181     unsigned char* outend = out + *outlen;
182     const unsigned char* inend;
183     unsigned int c;
184 
185     inend = in + (*inlen);
186     while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 	c= *in++;
188 
189         if (out >= outend)
190 	    break;
191         if (c < 0x80) {
192 	    *out++ = c;
193 	} else {
194 	    *outlen = out - outstart;
195 	    *inlen = processed - base;
196 	    return(-1);
197 	}
198 
199 	processed = (const unsigned char*) in;
200     }
201     *outlen = out - outstart;
202     *inlen = processed - base;
203     return(*outlen);
204 }
205 
206 #ifdef LIBXML_OUTPUT_ENABLED
207 /**
208  * UTF8Toascii:
209  * @out:  a pointer to an array of bytes to store the result
210  * @outlen:  the length of @out
211  * @in:  a pointer to an array of UTF-8 chars
212  * @inlen:  the length of @in
213  *
214  * Take a block of UTF-8 chars in and try to convert it to an ASCII
215  * block of chars out.
216  *
217  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218  * The value of @inlen after return is the number of octets consumed
219  *     if the return value is positive, else unpredictable.
220  * The value of @outlen after return is the number of octets produced.
221  */
222 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)223 UTF8Toascii(unsigned char* out, int *outlen,
224               const unsigned char* in, int *inlen) {
225     const unsigned char* processed = in;
226     const unsigned char* outend;
227     const unsigned char* outstart = out;
228     const unsigned char* instart = in;
229     const unsigned char* inend;
230     unsigned int c, d;
231     int trailing;
232 
233     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234     if (in == NULL) {
235         /*
236 	 * initialization nothing to do
237 	 */
238 	*outlen = 0;
239 	*inlen = 0;
240 	return(0);
241     }
242     inend = in + (*inlen);
243     outend = out + (*outlen);
244     while (in < inend) {
245 	d = *in++;
246 	if      (d < 0x80)  { c= d; trailing= 0; }
247 	else if (d < 0xC0) {
248 	    /* trailing byte in leading position */
249 	    *outlen = out - outstart;
250 	    *inlen = processed - instart;
251 	    return(-2);
252         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
253         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
254         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
255 	else {
256 	    /* no chance for this in Ascii */
257 	    *outlen = out - outstart;
258 	    *inlen = processed - instart;
259 	    return(-2);
260 	}
261 
262 	if (inend - in < trailing) {
263 	    break;
264 	}
265 
266 	for ( ; trailing; trailing--) {
267 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268 		break;
269 	    c <<= 6;
270 	    c |= d & 0x3F;
271 	}
272 
273 	/* assertion: c is a single UTF-4 value */
274 	if (c < 0x80) {
275 	    if (out >= outend)
276 		break;
277 	    *out++ = c;
278 	} else {
279 	    /* no chance for this in Ascii */
280 	    *outlen = out - outstart;
281 	    *inlen = processed - instart;
282 	    return(-2);
283 	}
284 	processed = in;
285     }
286     *outlen = out - outstart;
287     *inlen = processed - instart;
288     return(*outlen);
289 }
290 #endif /* LIBXML_OUTPUT_ENABLED */
291 
292 /**
293  * isolat1ToUTF8:
294  * @out:  a pointer to an array of bytes to store the result
295  * @outlen:  the length of @out
296  * @in:  a pointer to an array of ISO Latin 1 chars
297  * @inlen:  the length of @in
298  *
299  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300  * block of chars out.
301  * Returns the number of bytes written if success, or -1 otherwise
302  * The value of @inlen after return is the number of octets consumed
303  *     if the return value is positive, else unpredictable.
304  * The value of @outlen after return is the number of octets produced.
305  */
306 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)307 isolat1ToUTF8(unsigned char* out, int *outlen,
308               const unsigned char* in, int *inlen) {
309     unsigned char* outstart = out;
310     const unsigned char* base = in;
311     unsigned char* outend;
312     const unsigned char* inend;
313     const unsigned char* instop;
314 
315     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316 	return(-1);
317 
318     outend = out + *outlen;
319     inend = in + (*inlen);
320     instop = inend;
321 
322     while ((in < inend) && (out < outend - 1)) {
323 	if (*in >= 0x80) {
324 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
325             *out++ = ((*in) & 0x3F) | 0x80;
326 	    ++in;
327 	}
328 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
329 	while ((in < instop) && (*in < 0x80)) {
330 	    *out++ = *in++;
331 	}
332     }
333     if ((in < inend) && (out < outend) && (*in < 0x80)) {
334         *out++ = *in++;
335     }
336     *outlen = out - outstart;
337     *inlen = in - base;
338     return(*outlen);
339 }
340 
341 /**
342  * UTF8ToUTF8:
343  * @out:  a pointer to an array of bytes to store the result
344  * @outlen:  the length of @out
345  * @inb:  a pointer to an array of UTF-8 chars
346  * @inlenb:  the length of @in in UTF-8 chars
347  *
348  * No op copy operation for UTF8 handling.
349  *
350  * Returns the number of bytes written, or -1 if lack of space.
351  *     The value of *inlen after return is the number of octets consumed
352  *     if the return value is positive, else unpredictable.
353  */
354 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)355 UTF8ToUTF8(unsigned char* out, int *outlen,
356            const unsigned char* inb, int *inlenb)
357 {
358     int len;
359 
360     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361 	return(-1);
362     if (inb == NULL) {
363         /* inb == NULL means output is initialized. */
364         *outlen = 0;
365         *inlenb = 0;
366         return(0);
367     }
368     if (*outlen > *inlenb) {
369 	len = *inlenb;
370     } else {
371 	len = *outlen;
372     }
373     if (len < 0)
374 	return(-1);
375 
376     /*
377      * FIXME: Conversion functions must assure valid UTF-8, so we have
378      * to check for UTF-8 validity. Preferably, this converter shouldn't
379      * be used at all.
380      */
381     memcpy(out, inb, len);
382 
383     *outlen = len;
384     *inlenb = len;
385     return(*outlen);
386 }
387 
388 
389 #ifdef LIBXML_OUTPUT_ENABLED
390 /**
391  * UTF8Toisolat1:
392  * @out:  a pointer to an array of bytes to store the result
393  * @outlen:  the length of @out
394  * @in:  a pointer to an array of UTF-8 chars
395  * @inlen:  the length of @in
396  *
397  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
398  * block of chars out.
399  *
400  * Returns the number of bytes written if success, -2 if the transcoding fails,
401            or -1 otherwise
402  * The value of @inlen after return is the number of octets consumed
403  *     if the return value is positive, else unpredictable.
404  * The value of @outlen after return is the number of octets produced.
405  */
406 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)407 UTF8Toisolat1(unsigned char* out, int *outlen,
408               const unsigned char* in, int *inlen) {
409     const unsigned char* processed = in;
410     const unsigned char* outend;
411     const unsigned char* outstart = out;
412     const unsigned char* instart = in;
413     const unsigned char* inend;
414     unsigned int c, d;
415     int trailing;
416 
417     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
418     if (in == NULL) {
419         /*
420 	 * initialization nothing to do
421 	 */
422 	*outlen = 0;
423 	*inlen = 0;
424 	return(0);
425     }
426     inend = in + (*inlen);
427     outend = out + (*outlen);
428     while (in < inend) {
429 	d = *in++;
430 	if      (d < 0x80)  { c= d; trailing= 0; }
431 	else if (d < 0xC0) {
432 	    /* trailing byte in leading position */
433 	    *outlen = out - outstart;
434 	    *inlen = processed - instart;
435 	    return(-2);
436         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
437         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
438         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
439 	else {
440 	    /* no chance for this in IsoLat1 */
441 	    *outlen = out - outstart;
442 	    *inlen = processed - instart;
443 	    return(-2);
444 	}
445 
446 	if (inend - in < trailing) {
447 	    break;
448 	}
449 
450 	for ( ; trailing; trailing--) {
451 	    if (in >= inend)
452 		break;
453 	    if (((d= *in++) & 0xC0) != 0x80) {
454 		*outlen = out - outstart;
455 		*inlen = processed - instart;
456 		return(-2);
457 	    }
458 	    c <<= 6;
459 	    c |= d & 0x3F;
460 	}
461 
462 	/* assertion: c is a single UTF-4 value */
463 	if (c <= 0xFF) {
464 	    if (out >= outend)
465 		break;
466 	    *out++ = c;
467 	} else {
468 	    /* no chance for this in IsoLat1 */
469 	    *outlen = out - outstart;
470 	    *inlen = processed - instart;
471 	    return(-2);
472 	}
473 	processed = in;
474     }
475     *outlen = out - outstart;
476     *inlen = processed - instart;
477     return(*outlen);
478 }
479 #endif /* LIBXML_OUTPUT_ENABLED */
480 
481 /**
482  * UTF16LEToUTF8:
483  * @out:  a pointer to an array of bytes to store the result
484  * @outlen:  the length of @out
485  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
486  * @inlenb:  the length of @in in UTF-16LE chars
487  *
488  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
489  * block of chars out. This function assumes the endian property
490  * is the same between the native type of this machine and the
491  * inputed one.
492  *
493  * Returns the number of bytes written, or -1 if lack of space, or -2
494  *     if the transcoding fails (if *in is not a valid utf16 string)
495  *     The value of *inlen after return is the number of octets consumed
496  *     if the return value is positive, else unpredictable.
497  */
498 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)499 UTF16LEToUTF8(unsigned char* out, int *outlen,
500             const unsigned char* inb, int *inlenb)
501 {
502     unsigned char* outstart = out;
503     const unsigned char* processed = inb;
504     unsigned char* outend;
505     unsigned short* in = (unsigned short*) inb;
506     unsigned short* inend;
507     unsigned int c, d, inlen;
508     unsigned char *tmp;
509     int bits;
510 
511     if (*outlen == 0) {
512         *inlenb = 0;
513         return(0);
514     }
515     outend = out + *outlen;
516     if ((*inlenb % 2) == 1)
517         (*inlenb)--;
518     inlen = *inlenb / 2;
519     inend = in + inlen;
520     while ((in < inend) && (out - outstart + 5 < *outlen)) {
521         if (xmlLittleEndian) {
522 	    c= *in++;
523 	} else {
524 	    tmp = (unsigned char *) in;
525 	    c = *tmp++;
526 	    c = c | (((unsigned int)*tmp) << 8);
527 	    in++;
528 	}
529         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
530 	    if (in >= inend) {           /* handle split mutli-byte characters */
531 		break;
532 	    }
533 	    if (xmlLittleEndian) {
534 		d = *in++;
535 	    } else {
536 		tmp = (unsigned char *) in;
537 		d = *tmp++;
538 		d = d | (((unsigned int)*tmp) << 8);
539 		in++;
540 	    }
541             if ((d & 0xFC00) == 0xDC00) {
542                 c &= 0x03FF;
543                 c <<= 10;
544                 c |= d & 0x03FF;
545                 c += 0x10000;
546             }
547             else {
548 		*outlen = out - outstart;
549 		*inlenb = processed - inb;
550 	        return(-2);
551 	    }
552         }
553 
554 	/* assertion: c is a single UTF-4 value */
555         if (out >= outend)
556 	    break;
557         if      (c <    0x80) {  *out++=  c;                bits= -6; }
558         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
559         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
560         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
561 
562         for ( ; bits >= 0; bits-= 6) {
563             if (out >= outend)
564 	        break;
565             *out++= ((c >> bits) & 0x3F) | 0x80;
566         }
567 	processed = (const unsigned char*) in;
568     }
569     *outlen = out - outstart;
570     *inlenb = processed - inb;
571     return(*outlen);
572 }
573 
574 #ifdef LIBXML_OUTPUT_ENABLED
575 /**
576  * UTF8ToUTF16LE:
577  * @outb:  a pointer to an array of bytes to store the result
578  * @outlen:  the length of @outb
579  * @in:  a pointer to an array of UTF-8 chars
580  * @inlen:  the length of @in
581  *
582  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
583  * block of chars out.
584  *
585  * Returns the number of bytes written, or -1 if lack of space, or -2
586  *     if the transcoding failed.
587  */
588 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)589 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
590             const unsigned char* in, int *inlen)
591 {
592     unsigned short* out = (unsigned short*) outb;
593     const unsigned char* processed = in;
594     const unsigned char *const instart = in;
595     unsigned short* outstart= out;
596     unsigned short* outend;
597     const unsigned char* inend;
598     unsigned int c, d;
599     int trailing;
600     unsigned char *tmp;
601     unsigned short tmp1, tmp2;
602 
603     /* UTF16LE encoding has no BOM */
604     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
605     if (in == NULL) {
606 	*outlen = 0;
607 	*inlen = 0;
608 	return(0);
609     }
610     inend= in + *inlen;
611     outend = out + (*outlen / 2);
612     while (in < inend) {
613       d= *in++;
614       if      (d < 0x80)  { c= d; trailing= 0; }
615       else if (d < 0xC0) {
616           /* trailing byte in leading position */
617 	  *outlen = (out - outstart) * 2;
618 	  *inlen = processed - instart;
619 	  return(-2);
620       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
621       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
622       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
623       else {
624 	/* no chance for this in UTF-16 */
625 	*outlen = (out - outstart) * 2;
626 	*inlen = processed - instart;
627 	return(-2);
628       }
629 
630       if (inend - in < trailing) {
631           break;
632       }
633 
634       for ( ; trailing; trailing--) {
635           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
636 	      break;
637           c <<= 6;
638           c |= d & 0x3F;
639       }
640 
641       /* assertion: c is a single UTF-4 value */
642         if (c < 0x10000) {
643             if (out >= outend)
644 	        break;
645 	    if (xmlLittleEndian) {
646 		*out++ = c;
647 	    } else {
648 		tmp = (unsigned char *) out;
649 		*tmp = c ;
650 		*(tmp + 1) = c >> 8 ;
651 		out++;
652 	    }
653         }
654         else if (c < 0x110000) {
655             if (out+1 >= outend)
656 	        break;
657             c -= 0x10000;
658 	    if (xmlLittleEndian) {
659 		*out++ = 0xD800 | (c >> 10);
660 		*out++ = 0xDC00 | (c & 0x03FF);
661 	    } else {
662 		tmp1 = 0xD800 | (c >> 10);
663 		tmp = (unsigned char *) out;
664 		*tmp = (unsigned char) tmp1;
665 		*(tmp + 1) = tmp1 >> 8;
666 		out++;
667 
668 		tmp2 = 0xDC00 | (c & 0x03FF);
669 		tmp = (unsigned char *) out;
670 		*tmp  = (unsigned char) tmp2;
671 		*(tmp + 1) = tmp2 >> 8;
672 		out++;
673 	    }
674         }
675         else
676 	    break;
677 	processed = in;
678     }
679     *outlen = (out - outstart) * 2;
680     *inlen = processed - instart;
681     return(*outlen);
682 }
683 
684 /**
685  * UTF8ToUTF16:
686  * @outb:  a pointer to an array of bytes to store the result
687  * @outlen:  the length of @outb
688  * @in:  a pointer to an array of UTF-8 chars
689  * @inlen:  the length of @in
690  *
691  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
692  * block of chars out.
693  *
694  * Returns the number of bytes written, or -1 if lack of space, or -2
695  *     if the transcoding failed.
696  */
697 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)698 UTF8ToUTF16(unsigned char* outb, int *outlen,
699             const unsigned char* in, int *inlen)
700 {
701     if (in == NULL) {
702 	/*
703 	 * initialization, add the Byte Order Mark for UTF-16LE
704 	 */
705         if (*outlen >= 2) {
706 	    outb[0] = 0xFF;
707 	    outb[1] = 0xFE;
708 	    *outlen = 2;
709 	    *inlen = 0;
710 #ifdef DEBUG_ENCODING
711             xmlGenericError(xmlGenericErrorContext,
712 		    "Added FFFE Byte Order Mark\n");
713 #endif
714 	    return(2);
715 	}
716 	*outlen = 0;
717 	*inlen = 0;
718 	return(0);
719     }
720     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
721 }
722 #endif /* LIBXML_OUTPUT_ENABLED */
723 
724 /**
725  * UTF16BEToUTF8:
726  * @out:  a pointer to an array of bytes to store the result
727  * @outlen:  the length of @out
728  * @inb:  a pointer to an array of UTF-16 passed as a byte array
729  * @inlenb:  the length of @in in UTF-16 chars
730  *
731  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
732  * block of chars out. This function assumes the endian property
733  * is the same between the native type of this machine and the
734  * inputed one.
735  *
736  * Returns the number of bytes written, or -1 if lack of space, or -2
737  *     if the transcoding fails (if *in is not a valid utf16 string)
738  * The value of *inlen after return is the number of octets consumed
739  *     if the return value is positive, else unpredictable.
740  */
741 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)742 UTF16BEToUTF8(unsigned char* out, int *outlen,
743             const unsigned char* inb, int *inlenb)
744 {
745     unsigned char* outstart = out;
746     const unsigned char* processed = inb;
747     unsigned char* outend;
748     unsigned short* in = (unsigned short*) inb;
749     unsigned short* inend;
750     unsigned int c, d, inlen;
751     unsigned char *tmp;
752     int bits;
753 
754     if (*outlen == 0) {
755         *inlenb = 0;
756         return(0);
757     }
758     outend = out + *outlen;
759     if ((*inlenb % 2) == 1)
760         (*inlenb)--;
761     inlen = *inlenb / 2;
762     inend= in + inlen;
763     while ((in < inend) && (out - outstart + 5 < *outlen)) {
764 	if (xmlLittleEndian) {
765 	    tmp = (unsigned char *) in;
766 	    c = *tmp++;
767 	    c = (c << 8) | (unsigned int) *tmp;
768 	    in++;
769 	} else {
770 	    c= *in++;
771 	}
772         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
773 	    if (in >= inend) {           /* handle split mutli-byte characters */
774                 break;
775 	    }
776 	    if (xmlLittleEndian) {
777 		tmp = (unsigned char *) in;
778 		d = *tmp++;
779 		d = (d << 8) | (unsigned int) *tmp;
780 		in++;
781 	    } else {
782 		d= *in++;
783 	    }
784             if ((d & 0xFC00) == 0xDC00) {
785                 c &= 0x03FF;
786                 c <<= 10;
787                 c |= d & 0x03FF;
788                 c += 0x10000;
789             }
790             else {
791 		*outlen = out - outstart;
792 		*inlenb = processed - inb;
793 	        return(-2);
794 	    }
795         }
796 
797 	/* assertion: c is a single UTF-4 value */
798         if (out >= outend)
799 	    break;
800         if      (c <    0x80) {  *out++=  c;                bits= -6; }
801         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
802         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
803         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
804 
805         for ( ; bits >= 0; bits-= 6) {
806             if (out >= outend)
807 	        break;
808             *out++= ((c >> bits) & 0x3F) | 0x80;
809         }
810 	processed = (const unsigned char*) in;
811     }
812     *outlen = out - outstart;
813     *inlenb = processed - inb;
814     return(*outlen);
815 }
816 
817 #ifdef LIBXML_OUTPUT_ENABLED
818 /**
819  * UTF8ToUTF16BE:
820  * @outb:  a pointer to an array of bytes to store the result
821  * @outlen:  the length of @outb
822  * @in:  a pointer to an array of UTF-8 chars
823  * @inlen:  the length of @in
824  *
825  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
826  * block of chars out.
827  *
828  * Returns the number of byte written, or -1 by lack of space, or -2
829  *     if the transcoding failed.
830  */
831 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)832 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
833             const unsigned char* in, int *inlen)
834 {
835     unsigned short* out = (unsigned short*) outb;
836     const unsigned char* processed = in;
837     const unsigned char *const instart = in;
838     unsigned short* outstart= out;
839     unsigned short* outend;
840     const unsigned char* inend;
841     unsigned int c, d;
842     int trailing;
843     unsigned char *tmp;
844     unsigned short tmp1, tmp2;
845 
846     /* UTF-16BE has no BOM */
847     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
848     if (in == NULL) {
849 	*outlen = 0;
850 	*inlen = 0;
851 	return(0);
852     }
853     inend= in + *inlen;
854     outend = out + (*outlen / 2);
855     while (in < inend) {
856       d= *in++;
857       if      (d < 0x80)  { c= d; trailing= 0; }
858       else if (d < 0xC0)  {
859           /* trailing byte in leading position */
860 	  *outlen = out - outstart;
861 	  *inlen = processed - instart;
862 	  return(-2);
863       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
864       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
865       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
866       else {
867           /* no chance for this in UTF-16 */
868 	  *outlen = out - outstart;
869 	  *inlen = processed - instart;
870 	  return(-2);
871       }
872 
873       if (inend - in < trailing) {
874           break;
875       }
876 
877       for ( ; trailing; trailing--) {
878           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
879           c <<= 6;
880           c |= d & 0x3F;
881       }
882 
883       /* assertion: c is a single UTF-4 value */
884         if (c < 0x10000) {
885             if (out >= outend)  break;
886 	    if (xmlLittleEndian) {
887 		tmp = (unsigned char *) out;
888 		*tmp = c >> 8;
889 		*(tmp + 1) = c;
890 		out++;
891 	    } else {
892 		*out++ = c;
893 	    }
894         }
895         else if (c < 0x110000) {
896             if (out+1 >= outend)  break;
897             c -= 0x10000;
898 	    if (xmlLittleEndian) {
899 		tmp1 = 0xD800 | (c >> 10);
900 		tmp = (unsigned char *) out;
901 		*tmp = tmp1 >> 8;
902 		*(tmp + 1) = (unsigned char) tmp1;
903 		out++;
904 
905 		tmp2 = 0xDC00 | (c & 0x03FF);
906 		tmp = (unsigned char *) out;
907 		*tmp = tmp2 >> 8;
908 		*(tmp + 1) = (unsigned char) tmp2;
909 		out++;
910 	    } else {
911 		*out++ = 0xD800 | (c >> 10);
912 		*out++ = 0xDC00 | (c & 0x03FF);
913 	    }
914         }
915         else
916 	    break;
917 	processed = in;
918     }
919     *outlen = (out - outstart) * 2;
920     *inlen = processed - instart;
921     return(*outlen);
922 }
923 #endif /* LIBXML_OUTPUT_ENABLED */
924 
925 /************************************************************************
926  *									*
927  *		Generic encoding handling routines			*
928  *									*
929  ************************************************************************/
930 
931 /**
932  * xmlDetectCharEncoding:
933  * @in:  a pointer to the first bytes of the XML entity, must be at least
934  *       2 bytes long (at least 4 if encoding is UTF4 variant).
935  * @len:  pointer to the length of the buffer
936  *
937  * Guess the encoding of the entity using the first bytes of the entity content
938  * according to the non-normative appendix F of the XML-1.0 recommendation.
939  *
940  * Returns one of the XML_CHAR_ENCODING_... values.
941  */
942 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)943 xmlDetectCharEncoding(const unsigned char* in, int len)
944 {
945     if (in == NULL)
946         return(XML_CHAR_ENCODING_NONE);
947     if (len >= 4) {
948 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
949 	    (in[2] == 0x00) && (in[3] == 0x3C))
950 	    return(XML_CHAR_ENCODING_UCS4BE);
951 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952 	    (in[2] == 0x00) && (in[3] == 0x00))
953 	    return(XML_CHAR_ENCODING_UCS4LE);
954 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
955 	    (in[2] == 0x3C) && (in[3] == 0x00))
956 	    return(XML_CHAR_ENCODING_UCS4_2143);
957 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
958 	    (in[2] == 0x00) && (in[3] == 0x00))
959 	    return(XML_CHAR_ENCODING_UCS4_3412);
960 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
961 	    (in[2] == 0xA7) && (in[3] == 0x94))
962 	    return(XML_CHAR_ENCODING_EBCDIC);
963 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
964 	    (in[2] == 0x78) && (in[3] == 0x6D))
965 	    return(XML_CHAR_ENCODING_UTF8);
966 	/*
967 	 * Although not part of the recommendation, we also
968 	 * attempt an "auto-recognition" of UTF-16LE and
969 	 * UTF-16BE encodings.
970 	 */
971 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
972 	    (in[2] == 0x3F) && (in[3] == 0x00))
973 	    return(XML_CHAR_ENCODING_UTF16LE);
974 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
975 	    (in[2] == 0x00) && (in[3] == 0x3F))
976 	    return(XML_CHAR_ENCODING_UTF16BE);
977     }
978     if (len >= 3) {
979 	/*
980 	 * Errata on XML-1.0 June 20 2001
981 	 * We now allow an UTF8 encoded BOM
982 	 */
983 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
984 	    (in[2] == 0xBF))
985 	    return(XML_CHAR_ENCODING_UTF8);
986     }
987     /* For UTF-16 we can recognize by the BOM */
988     if (len >= 2) {
989 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
990 	    return(XML_CHAR_ENCODING_UTF16BE);
991 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
992 	    return(XML_CHAR_ENCODING_UTF16LE);
993     }
994     return(XML_CHAR_ENCODING_NONE);
995 }
996 
997 /**
998  * xmlCleanupEncodingAliases:
999  *
1000  * Unregisters all aliases
1001  */
1002 void
xmlCleanupEncodingAliases(void)1003 xmlCleanupEncodingAliases(void) {
1004     int i;
1005 
1006     if (xmlCharEncodingAliases == NULL)
1007 	return;
1008 
1009     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1010 	if (xmlCharEncodingAliases[i].name != NULL)
1011 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1012 	if (xmlCharEncodingAliases[i].alias != NULL)
1013 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1014     }
1015     xmlCharEncodingAliasesNb = 0;
1016     xmlCharEncodingAliasesMax = 0;
1017     xmlFree(xmlCharEncodingAliases);
1018     xmlCharEncodingAliases = NULL;
1019 }
1020 
1021 /**
1022  * xmlGetEncodingAlias:
1023  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1024  *
1025  * Lookup an encoding name for the given alias.
1026  *
1027  * Returns NULL if not found, otherwise the original name
1028  */
1029 const char *
xmlGetEncodingAlias(const char * alias)1030 xmlGetEncodingAlias(const char *alias) {
1031     int i;
1032     char upper[100];
1033 
1034     if (alias == NULL)
1035 	return(NULL);
1036 
1037     if (xmlCharEncodingAliases == NULL)
1038 	return(NULL);
1039 
1040     for (i = 0;i < 99;i++) {
1041         upper[i] = toupper(alias[i]);
1042 	if (upper[i] == 0) break;
1043     }
1044     upper[i] = 0;
1045 
1046     /*
1047      * Walk down the list looking for a definition of the alias
1048      */
1049     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1050 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1051 	    return(xmlCharEncodingAliases[i].name);
1052 	}
1053     }
1054     return(NULL);
1055 }
1056 
1057 /**
1058  * xmlAddEncodingAlias:
1059  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1060  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1061  *
1062  * Registers an alias @alias for an encoding named @name. Existing alias
1063  * will be overwritten.
1064  *
1065  * Returns 0 in case of success, -1 in case of error
1066  */
1067 int
xmlAddEncodingAlias(const char * name,const char * alias)1068 xmlAddEncodingAlias(const char *name, const char *alias) {
1069     int i;
1070     char upper[100];
1071 
1072     if ((name == NULL) || (alias == NULL))
1073 	return(-1);
1074 
1075     for (i = 0;i < 99;i++) {
1076         upper[i] = toupper(alias[i]);
1077 	if (upper[i] == 0) break;
1078     }
1079     upper[i] = 0;
1080 
1081     if (xmlCharEncodingAliases == NULL) {
1082 	xmlCharEncodingAliasesNb = 0;
1083 	xmlCharEncodingAliasesMax = 20;
1084 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1085 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1086 	if (xmlCharEncodingAliases == NULL)
1087 	    return(-1);
1088     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1089 	xmlCharEncodingAliasesMax *= 2;
1090 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1091 	      xmlRealloc(xmlCharEncodingAliases,
1092 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1093     }
1094     /*
1095      * Walk down the list looking for a definition of the alias
1096      */
1097     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1098 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1099 	    /*
1100 	     * Replace the definition.
1101 	     */
1102 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1103 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1104 	    return(0);
1105 	}
1106     }
1107     /*
1108      * Add the definition
1109      */
1110     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1111     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1112     xmlCharEncodingAliasesNb++;
1113     return(0);
1114 }
1115 
1116 /**
1117  * xmlDelEncodingAlias:
1118  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1119  *
1120  * Unregisters an encoding alias @alias
1121  *
1122  * Returns 0 in case of success, -1 in case of error
1123  */
1124 int
xmlDelEncodingAlias(const char * alias)1125 xmlDelEncodingAlias(const char *alias) {
1126     int i;
1127 
1128     if (alias == NULL)
1129 	return(-1);
1130 
1131     if (xmlCharEncodingAliases == NULL)
1132 	return(-1);
1133     /*
1134      * Walk down the list looking for a definition of the alias
1135      */
1136     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1137 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1138 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1139 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1140 	    xmlCharEncodingAliasesNb--;
1141 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1142 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1143 	    return(0);
1144 	}
1145     }
1146     return(-1);
1147 }
1148 
1149 /**
1150  * xmlParseCharEncoding:
1151  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1152  *
1153  * Compare the string to the encoding schemes already known. Note
1154  * that the comparison is case insensitive accordingly to the section
1155  * [XML] 4.3.3 Character Encoding in Entities.
1156  *
1157  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1158  * if not recognized.
1159  */
1160 xmlCharEncoding
xmlParseCharEncoding(const char * name)1161 xmlParseCharEncoding(const char* name)
1162 {
1163     const char *alias;
1164     char upper[500];
1165     int i;
1166 
1167     if (name == NULL)
1168 	return(XML_CHAR_ENCODING_NONE);
1169 
1170     /*
1171      * Do the alias resolution
1172      */
1173     alias = xmlGetEncodingAlias(name);
1174     if (alias != NULL)
1175 	name = alias;
1176 
1177     for (i = 0;i < 499;i++) {
1178         upper[i] = toupper(name[i]);
1179 	if (upper[i] == 0) break;
1180     }
1181     upper[i] = 0;
1182 
1183     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1184     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1185     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1186 
1187     /*
1188      * NOTE: if we were able to parse this, the endianness of UTF16 is
1189      *       already found and in use
1190      */
1191     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1192     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1193 
1194     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1196     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1197 
1198     /*
1199      * NOTE: if we were able to parse this, the endianness of UCS4 is
1200      *       already found and in use
1201      */
1202     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1204     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1205 
1206 
1207     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1208     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1209     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1210 
1211     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1212     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1213     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1214 
1215     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1216     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1217     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1218     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1219     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1220     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1221     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1222 
1223     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1224     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1225     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1226 
1227 #ifdef DEBUG_ENCODING
1228     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1229 #endif
1230     return(XML_CHAR_ENCODING_ERROR);
1231 }
1232 
1233 /**
1234  * xmlGetCharEncodingName:
1235  * @enc:  the encoding
1236  *
1237  * The "canonical" name for XML encoding.
1238  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1239  * Section 4.3.3  Character Encoding in Entities
1240  *
1241  * Returns the canonical name for the given encoding
1242  */
1243 
1244 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1245 xmlGetCharEncodingName(xmlCharEncoding enc) {
1246     switch (enc) {
1247         case XML_CHAR_ENCODING_ERROR:
1248 	    return(NULL);
1249         case XML_CHAR_ENCODING_NONE:
1250 	    return(NULL);
1251         case XML_CHAR_ENCODING_UTF8:
1252 	    return("UTF-8");
1253         case XML_CHAR_ENCODING_UTF16LE:
1254 	    return("UTF-16");
1255         case XML_CHAR_ENCODING_UTF16BE:
1256 	    return("UTF-16");
1257         case XML_CHAR_ENCODING_EBCDIC:
1258             return("EBCDIC");
1259         case XML_CHAR_ENCODING_UCS4LE:
1260             return("ISO-10646-UCS-4");
1261         case XML_CHAR_ENCODING_UCS4BE:
1262             return("ISO-10646-UCS-4");
1263         case XML_CHAR_ENCODING_UCS4_2143:
1264             return("ISO-10646-UCS-4");
1265         case XML_CHAR_ENCODING_UCS4_3412:
1266             return("ISO-10646-UCS-4");
1267         case XML_CHAR_ENCODING_UCS2:
1268             return("ISO-10646-UCS-2");
1269         case XML_CHAR_ENCODING_8859_1:
1270 	    return("ISO-8859-1");
1271         case XML_CHAR_ENCODING_8859_2:
1272 	    return("ISO-8859-2");
1273         case XML_CHAR_ENCODING_8859_3:
1274 	    return("ISO-8859-3");
1275         case XML_CHAR_ENCODING_8859_4:
1276 	    return("ISO-8859-4");
1277         case XML_CHAR_ENCODING_8859_5:
1278 	    return("ISO-8859-5");
1279         case XML_CHAR_ENCODING_8859_6:
1280 	    return("ISO-8859-6");
1281         case XML_CHAR_ENCODING_8859_7:
1282 	    return("ISO-8859-7");
1283         case XML_CHAR_ENCODING_8859_8:
1284 	    return("ISO-8859-8");
1285         case XML_CHAR_ENCODING_8859_9:
1286 	    return("ISO-8859-9");
1287         case XML_CHAR_ENCODING_2022_JP:
1288             return("ISO-2022-JP");
1289         case XML_CHAR_ENCODING_SHIFT_JIS:
1290             return("Shift-JIS");
1291         case XML_CHAR_ENCODING_EUC_JP:
1292             return("EUC-JP");
1293 	case XML_CHAR_ENCODING_ASCII:
1294 	    return(NULL);
1295     }
1296     return(NULL);
1297 }
1298 
1299 /************************************************************************
1300  *									*
1301  *			Char encoding handlers				*
1302  *									*
1303  ************************************************************************/
1304 
1305 
1306 /* the size should be growable, but it's not a big deal ... */
1307 #define MAX_ENCODING_HANDLERS 50
1308 static xmlCharEncodingHandlerPtr *handlers = NULL;
1309 static int nbCharEncodingHandler = 0;
1310 
1311 /*
1312  * The default is UTF-8 for XML, that's also the default used for the
1313  * parser internals, so the default encoding handler is NULL
1314  */
1315 
1316 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1317 
1318 /**
1319  * xmlNewCharEncodingHandler:
1320  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1321  * @input:  the xmlCharEncodingInputFunc to read that encoding
1322  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1323  *
1324  * Create and registers an xmlCharEncodingHandler.
1325  *
1326  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1327  */
1328 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1329 xmlNewCharEncodingHandler(const char *name,
1330                           xmlCharEncodingInputFunc input,
1331                           xmlCharEncodingOutputFunc output) {
1332     xmlCharEncodingHandlerPtr handler;
1333     const char *alias;
1334     char upper[500];
1335     int i;
1336     char *up = NULL;
1337 
1338     /*
1339      * Do the alias resolution
1340      */
1341     alias = xmlGetEncodingAlias(name);
1342     if (alias != NULL)
1343 	name = alias;
1344 
1345     /*
1346      * Keep only the uppercase version of the encoding.
1347      */
1348     if (name == NULL) {
1349         xmlEncodingErr(XML_I18N_NO_NAME,
1350 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1351 	return(NULL);
1352     }
1353     for (i = 0;i < 499;i++) {
1354         upper[i] = toupper(name[i]);
1355 	if (upper[i] == 0) break;
1356     }
1357     upper[i] = 0;
1358     up = xmlMemStrdup(upper);
1359     if (up == NULL) {
1360         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361 	return(NULL);
1362     }
1363 
1364     /*
1365      * allocate and fill-up an handler block.
1366      */
1367     handler = (xmlCharEncodingHandlerPtr)
1368               xmlMalloc(sizeof(xmlCharEncodingHandler));
1369     if (handler == NULL) {
1370         xmlFree(up);
1371         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1372 	return(NULL);
1373     }
1374     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1375     handler->input = input;
1376     handler->output = output;
1377     handler->name = up;
1378 
1379 #ifdef LIBXML_ICONV_ENABLED
1380     handler->iconv_in = NULL;
1381     handler->iconv_out = NULL;
1382 #endif
1383 #ifdef LIBXML_ICU_ENABLED
1384     handler->uconv_in = NULL;
1385     handler->uconv_out = NULL;
1386 #endif
1387 
1388     /*
1389      * registers and returns the handler.
1390      */
1391     xmlRegisterCharEncodingHandler(handler);
1392 #ifdef DEBUG_ENCODING
1393     xmlGenericError(xmlGenericErrorContext,
1394 	    "Registered encoding handler for %s\n", name);
1395 #endif
1396     return(handler);
1397 }
1398 
1399 /**
1400  * xmlInitCharEncodingHandlers:
1401  *
1402  * Initialize the char encoding support, it registers the default
1403  * encoding supported.
1404  * NOTE: while public, this function usually doesn't need to be called
1405  *       in normal processing.
1406  */
1407 void
xmlInitCharEncodingHandlers(void)1408 xmlInitCharEncodingHandlers(void) {
1409     unsigned short int tst = 0x1234;
1410     unsigned char *ptr = (unsigned char *) &tst;
1411 
1412     if (handlers != NULL) return;
1413 
1414     handlers = (xmlCharEncodingHandlerPtr *)
1415         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1416 
1417     if (*ptr == 0x12) xmlLittleEndian = 0;
1418     else if (*ptr == 0x34) xmlLittleEndian = 1;
1419     else {
1420         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1421 	               "Odd problem at endianness detection\n", NULL);
1422     }
1423 
1424     if (handlers == NULL) {
1425         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1426 	return;
1427     }
1428     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1429 #ifdef LIBXML_OUTPUT_ENABLED
1430     xmlUTF16LEHandler =
1431           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1432     xmlUTF16BEHandler =
1433           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1434     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1435     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1436     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1437     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1438 #ifdef LIBXML_HTML_ENABLED
1439     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1440 #endif
1441 #else
1442     xmlUTF16LEHandler =
1443           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1444     xmlUTF16BEHandler =
1445           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1446     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1447     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1448     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1449     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1450 #endif /* LIBXML_OUTPUT_ENABLED */
1451 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1452 #ifdef LIBXML_ISO8859X_ENABLED
1453     xmlRegisterCharEncodingHandlersISO8859x ();
1454 #endif
1455 #endif
1456 
1457 }
1458 
1459 /**
1460  * xmlCleanupCharEncodingHandlers:
1461  *
1462  * Cleanup the memory allocated for the char encoding support, it
1463  * unregisters all the encoding handlers and the aliases.
1464  */
1465 void
xmlCleanupCharEncodingHandlers(void)1466 xmlCleanupCharEncodingHandlers(void) {
1467     xmlCleanupEncodingAliases();
1468 
1469     if (handlers == NULL) return;
1470 
1471     for (;nbCharEncodingHandler > 0;) {
1472         nbCharEncodingHandler--;
1473 	if (handlers[nbCharEncodingHandler] != NULL) {
1474 	    if (handlers[nbCharEncodingHandler]->name != NULL)
1475 		xmlFree(handlers[nbCharEncodingHandler]->name);
1476 	    xmlFree(handlers[nbCharEncodingHandler]);
1477 	}
1478     }
1479     xmlFree(handlers);
1480     handlers = NULL;
1481     nbCharEncodingHandler = 0;
1482     xmlDefaultCharEncodingHandler = NULL;
1483 }
1484 
1485 /**
1486  * xmlRegisterCharEncodingHandler:
1487  * @handler:  the xmlCharEncodingHandlerPtr handler block
1488  *
1489  * Register the char encoding handler, surprising, isn't it ?
1490  */
1491 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1492 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1493     if (handlers == NULL) xmlInitCharEncodingHandlers();
1494     if ((handler == NULL) || (handlers == NULL)) {
1495         xmlEncodingErr(XML_I18N_NO_HANDLER,
1496 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1497         goto free_handler;
1498     }
1499 
1500     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1501         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1502 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1503 	               "MAX_ENCODING_HANDLERS");
1504         goto free_handler;
1505     }
1506     handlers[nbCharEncodingHandler++] = handler;
1507     return;
1508 
1509 free_handler:
1510     if (handler != NULL) {
1511         if (handler->name != NULL) {
1512             xmlFree(handler->name);
1513         }
1514         xmlFree(handler);
1515     }
1516 }
1517 
1518 /**
1519  * xmlGetCharEncodingHandler:
1520  * @enc:  an xmlCharEncoding value.
1521  *
1522  * Search in the registered set the handler able to read/write that encoding.
1523  *
1524  * Returns the handler or NULL if not found
1525  */
1526 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1527 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1528     xmlCharEncodingHandlerPtr handler;
1529 
1530     if (handlers == NULL) xmlInitCharEncodingHandlers();
1531     switch (enc) {
1532         case XML_CHAR_ENCODING_ERROR:
1533 	    return(NULL);
1534         case XML_CHAR_ENCODING_NONE:
1535 	    return(NULL);
1536         case XML_CHAR_ENCODING_UTF8:
1537 	    return(NULL);
1538         case XML_CHAR_ENCODING_UTF16LE:
1539 	    return(xmlUTF16LEHandler);
1540         case XML_CHAR_ENCODING_UTF16BE:
1541 	    return(xmlUTF16BEHandler);
1542         case XML_CHAR_ENCODING_EBCDIC:
1543             handler = xmlFindCharEncodingHandler("EBCDIC");
1544             if (handler != NULL) return(handler);
1545             handler = xmlFindCharEncodingHandler("ebcdic");
1546             if (handler != NULL) return(handler);
1547             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1548             if (handler != NULL) return(handler);
1549             handler = xmlFindCharEncodingHandler("IBM-037");
1550             if (handler != NULL) return(handler);
1551 	    break;
1552         case XML_CHAR_ENCODING_UCS4BE:
1553             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1554             if (handler != NULL) return(handler);
1555             handler = xmlFindCharEncodingHandler("UCS-4");
1556             if (handler != NULL) return(handler);
1557             handler = xmlFindCharEncodingHandler("UCS4");
1558             if (handler != NULL) return(handler);
1559 	    break;
1560         case XML_CHAR_ENCODING_UCS4LE:
1561             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1562             if (handler != NULL) return(handler);
1563             handler = xmlFindCharEncodingHandler("UCS-4");
1564             if (handler != NULL) return(handler);
1565             handler = xmlFindCharEncodingHandler("UCS4");
1566             if (handler != NULL) return(handler);
1567 	    break;
1568         case XML_CHAR_ENCODING_UCS4_2143:
1569 	    break;
1570         case XML_CHAR_ENCODING_UCS4_3412:
1571 	    break;
1572         case XML_CHAR_ENCODING_UCS2:
1573             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1574             if (handler != NULL) return(handler);
1575             handler = xmlFindCharEncodingHandler("UCS-2");
1576             if (handler != NULL) return(handler);
1577             handler = xmlFindCharEncodingHandler("UCS2");
1578             if (handler != NULL) return(handler);
1579 	    break;
1580 
1581 	    /*
1582 	     * We used to keep ISO Latin encodings native in the
1583 	     * generated data. This led to so many problems that
1584 	     * this has been removed. One can still change this
1585 	     * back by registering no-ops encoders for those
1586 	     */
1587         case XML_CHAR_ENCODING_8859_1:
1588 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1589 	    if (handler != NULL) return(handler);
1590 	    break;
1591         case XML_CHAR_ENCODING_8859_2:
1592 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1593 	    if (handler != NULL) return(handler);
1594 	    break;
1595         case XML_CHAR_ENCODING_8859_3:
1596 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1597 	    if (handler != NULL) return(handler);
1598 	    break;
1599         case XML_CHAR_ENCODING_8859_4:
1600 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1601 	    if (handler != NULL) return(handler);
1602 	    break;
1603         case XML_CHAR_ENCODING_8859_5:
1604 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1605 	    if (handler != NULL) return(handler);
1606 	    break;
1607         case XML_CHAR_ENCODING_8859_6:
1608 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1609 	    if (handler != NULL) return(handler);
1610 	    break;
1611         case XML_CHAR_ENCODING_8859_7:
1612 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1613 	    if (handler != NULL) return(handler);
1614 	    break;
1615         case XML_CHAR_ENCODING_8859_8:
1616 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1617 	    if (handler != NULL) return(handler);
1618 	    break;
1619         case XML_CHAR_ENCODING_8859_9:
1620 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1621 	    if (handler != NULL) return(handler);
1622 	    break;
1623 
1624 
1625         case XML_CHAR_ENCODING_2022_JP:
1626             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1627             if (handler != NULL) return(handler);
1628 	    break;
1629         case XML_CHAR_ENCODING_SHIFT_JIS:
1630             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1631             if (handler != NULL) return(handler);
1632             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1633             if (handler != NULL) return(handler);
1634             handler = xmlFindCharEncodingHandler("Shift_JIS");
1635             if (handler != NULL) return(handler);
1636 	    break;
1637         case XML_CHAR_ENCODING_EUC_JP:
1638             handler = xmlFindCharEncodingHandler("EUC-JP");
1639             if (handler != NULL) return(handler);
1640 	    break;
1641 	default:
1642 	    break;
1643     }
1644 
1645 #ifdef DEBUG_ENCODING
1646     xmlGenericError(xmlGenericErrorContext,
1647 	    "No handler found for encoding %d\n", enc);
1648 #endif
1649     return(NULL);
1650 }
1651 
1652 /**
1653  * xmlFindCharEncodingHandler:
1654  * @name:  a string describing the char encoding.
1655  *
1656  * Search in the registered set the handler able to read/write that encoding.
1657  *
1658  * Returns the handler or NULL if not found
1659  */
1660 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1661 xmlFindCharEncodingHandler(const char *name) {
1662     const char *nalias;
1663     const char *norig;
1664     xmlCharEncoding alias;
1665 #ifdef LIBXML_ICONV_ENABLED
1666     xmlCharEncodingHandlerPtr enc;
1667     iconv_t icv_in, icv_out;
1668 #endif /* LIBXML_ICONV_ENABLED */
1669 #ifdef LIBXML_ICU_ENABLED
1670     xmlCharEncodingHandlerPtr encu;
1671     uconv_t *ucv_in, *ucv_out;
1672 #endif /* LIBXML_ICU_ENABLED */
1673     char upper[100];
1674     int i;
1675 
1676     if (handlers == NULL) xmlInitCharEncodingHandlers();
1677     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1678     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1679 
1680     /*
1681      * Do the alias resolution
1682      */
1683     norig = name;
1684     nalias = xmlGetEncodingAlias(name);
1685     if (nalias != NULL)
1686 	name = nalias;
1687 
1688     /*
1689      * Check first for directly registered encoding names
1690      */
1691     for (i = 0;i < 99;i++) {
1692         upper[i] = toupper(name[i]);
1693 	if (upper[i] == 0) break;
1694     }
1695     upper[i] = 0;
1696 
1697     if (handlers != NULL) {
1698         for (i = 0;i < nbCharEncodingHandler; i++) {
1699             if (!strcmp(upper, handlers[i]->name)) {
1700 #ifdef DEBUG_ENCODING
1701                 xmlGenericError(xmlGenericErrorContext,
1702                         "Found registered handler for encoding %s\n", name);
1703 #endif
1704                 return(handlers[i]);
1705             }
1706         }
1707     }
1708 
1709 #ifdef LIBXML_ICONV_ENABLED
1710     /* check whether iconv can handle this */
1711     icv_in = iconv_open("UTF-8", name);
1712     icv_out = iconv_open(name, "UTF-8");
1713     if (icv_in == (iconv_t) -1) {
1714         icv_in = iconv_open("UTF-8", upper);
1715     }
1716     if (icv_out == (iconv_t) -1) {
1717 	icv_out = iconv_open(upper, "UTF-8");
1718     }
1719     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1720 	    enc = (xmlCharEncodingHandlerPtr)
1721 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1722 	    if (enc == NULL) {
1723 	        iconv_close(icv_in);
1724 	        iconv_close(icv_out);
1725 		return(NULL);
1726 	    }
1727             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1728 	    enc->name = xmlMemStrdup(name);
1729 	    enc->input = NULL;
1730 	    enc->output = NULL;
1731 	    enc->iconv_in = icv_in;
1732 	    enc->iconv_out = icv_out;
1733 #ifdef DEBUG_ENCODING
1734             xmlGenericError(xmlGenericErrorContext,
1735 		    "Found iconv handler for encoding %s\n", name);
1736 #endif
1737 	    return enc;
1738     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1739 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1740 		    "iconv : problems with filters for '%s'\n", name);
1741     }
1742 #endif /* LIBXML_ICONV_ENABLED */
1743 #ifdef LIBXML_ICU_ENABLED
1744     /* check whether icu can handle this */
1745     ucv_in = openIcuConverter(name, 1);
1746     ucv_out = openIcuConverter(name, 0);
1747     if (ucv_in != NULL && ucv_out != NULL) {
1748 	    encu = (xmlCharEncodingHandlerPtr)
1749 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1750 	    if (encu == NULL) {
1751                 closeIcuConverter(ucv_in);
1752                 closeIcuConverter(ucv_out);
1753 		return(NULL);
1754 	    }
1755             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1756 	    encu->name = xmlMemStrdup(name);
1757 	    encu->input = NULL;
1758 	    encu->output = NULL;
1759 	    encu->uconv_in = ucv_in;
1760 	    encu->uconv_out = ucv_out;
1761 #ifdef DEBUG_ENCODING
1762             xmlGenericError(xmlGenericErrorContext,
1763 		    "Found ICU converter handler for encoding %s\n", name);
1764 #endif
1765 	    return encu;
1766     } else if (ucv_in != NULL || ucv_out != NULL) {
1767             closeIcuConverter(ucv_in);
1768             closeIcuConverter(ucv_out);
1769 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1770 		    "ICU converter : problems with filters for '%s'\n", name);
1771     }
1772 #endif /* LIBXML_ICU_ENABLED */
1773 
1774 #ifdef DEBUG_ENCODING
1775     xmlGenericError(xmlGenericErrorContext,
1776 	    "No handler found for encoding %s\n", name);
1777 #endif
1778 
1779     /*
1780      * Fallback using the canonical names
1781      */
1782     alias = xmlParseCharEncoding(norig);
1783     if (alias != XML_CHAR_ENCODING_ERROR) {
1784         const char* canon;
1785         canon = xmlGetCharEncodingName(alias);
1786         if ((canon != NULL) && (strcmp(name, canon))) {
1787 	    return(xmlFindCharEncodingHandler(canon));
1788         }
1789     }
1790 
1791     /* If "none of the above", give up */
1792     return(NULL);
1793 }
1794 
1795 /************************************************************************
1796  *									*
1797  *		ICONV based generic conversion functions		*
1798  *									*
1799  ************************************************************************/
1800 
1801 #ifdef LIBXML_ICONV_ENABLED
1802 /**
1803  * xmlIconvWrapper:
1804  * @cd:		iconv converter data structure
1805  * @out:  a pointer to an array of bytes to store the result
1806  * @outlen:  the length of @out
1807  * @in:  a pointer to an array of input bytes
1808  * @inlen:  the length of @in
1809  *
1810  * Returns 0 if success, or
1811  *     -1 by lack of space, or
1812  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1813  *        the result of transformation can't fit into the encoding we want), or
1814  *     -3 if there the last byte can't form a single output char.
1815  *
1816  * The value of @inlen after return is the number of octets consumed
1817  *     as the return value is positive, else unpredictable.
1818  * The value of @outlen after return is the number of octets produced.
1819  */
1820 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1821 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1822                 const unsigned char *in, int *inlen) {
1823     size_t icv_inlen, icv_outlen;
1824     const char *icv_in = (const char *) in;
1825     char *icv_out = (char *) out;
1826     size_t ret;
1827 
1828     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1829         if (outlen != NULL) *outlen = 0;
1830         return(-1);
1831     }
1832     icv_inlen = *inlen;
1833     icv_outlen = *outlen;
1834     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1835     *inlen -= icv_inlen;
1836     *outlen -= icv_outlen;
1837     if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1838 #ifdef EILSEQ
1839         if (errno == EILSEQ) {
1840             return -2;
1841         } else
1842 #endif
1843 #ifdef E2BIG
1844         if (errno == E2BIG) {
1845             return -1;
1846         } else
1847 #endif
1848 #ifdef EINVAL
1849         if (errno == EINVAL) {
1850             return -3;
1851         } else
1852 #endif
1853         {
1854             return -3;
1855         }
1856     }
1857     return 0;
1858 }
1859 #endif /* LIBXML_ICONV_ENABLED */
1860 
1861 /************************************************************************
1862  *									*
1863  *		ICU based generic conversion functions		*
1864  *									*
1865  ************************************************************************/
1866 
1867 #ifdef LIBXML_ICU_ENABLED
1868 /**
1869  * xmlUconvWrapper:
1870  * @cd: ICU uconverter data structure
1871  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1872  * @out:  a pointer to an array of bytes to store the result
1873  * @outlen:  the length of @out
1874  * @in:  a pointer to an array of input bytes
1875  * @inlen:  the length of @in
1876  * @flush: if true, indicates end of input
1877  *
1878  * Returns 0 if success, or
1879  *     -1 by lack of space, or
1880  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1881  *        the result of transformation can't fit into the encoding we want), or
1882  *     -3 if there the last byte can't form a single output char.
1883  *
1884  * The value of @inlen after return is the number of octets consumed
1885  *     as the return value is positive, else unpredictable.
1886  * The value of @outlen after return is the number of octets produced.
1887  */
1888 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1889 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1890                 const unsigned char *in, int *inlen, int flush) {
1891     const char *ucv_in = (const char *) in;
1892     char *ucv_out = (char *) out;
1893     UErrorCode err = U_ZERO_ERROR;
1894 
1895     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1896         if (outlen != NULL) *outlen = 0;
1897         return(-1);
1898     }
1899 
1900     if (toUnicode) {
1901         /* encoding => UTF-16 => UTF-8 */
1902         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1903                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1904                        &cd->pivot_source, &cd->pivot_target,
1905                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1906     } else {
1907         /* UTF-8 => UTF-16 => encoding */
1908         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1909                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1910                        &cd->pivot_source, &cd->pivot_target,
1911                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1912     }
1913     *inlen = ucv_in - (const char*) in;
1914     *outlen = ucv_out - (char *) out;
1915     if (U_SUCCESS(err)) {
1916         /* reset pivot buf if this is the last call for input (flush==TRUE) */
1917         if (flush)
1918             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1919         return 0;
1920     }
1921     if (err == U_BUFFER_OVERFLOW_ERROR)
1922         return -1;
1923     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1924         return -2;
1925     return -3;
1926 }
1927 #endif /* LIBXML_ICU_ENABLED */
1928 
1929 /************************************************************************
1930  *									*
1931  *		The real API used by libxml for on-the-fly conversion	*
1932  *									*
1933  ************************************************************************/
1934 
1935 /**
1936  * xmlEncInputChunk:
1937  * @handler:  encoding handler
1938  * @out:  a pointer to an array of bytes to store the result
1939  * @outlen:  the length of @out
1940  * @in:  a pointer to an array of input bytes
1941  * @inlen:  the length of @in
1942  * @flush:  flush (ICU-related)
1943  *
1944  * Returns 0 if success, or
1945  *     -1 by lack of space, or
1946  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1947  *        the result of transformation can't fit into the encoding we want), or
1948  *     -3 if there the last byte can't form a single output char.
1949  *
1950  * The value of @inlen after return is the number of octets consumed
1951  *     as the return value is 0, else unpredictable.
1952  * The value of @outlen after return is the number of octets produced.
1953  */
1954 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1955 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1956                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1957     int ret;
1958     (void)flush;
1959 
1960     if (handler->input != NULL) {
1961         ret = handler->input(out, outlen, in, inlen);
1962         if (ret > 0)
1963            ret = 0;
1964     }
1965 #ifdef LIBXML_ICONV_ENABLED
1966     else if (handler->iconv_in != NULL) {
1967         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1968     }
1969 #endif /* LIBXML_ICONV_ENABLED */
1970 #ifdef LIBXML_ICU_ENABLED
1971     else if (handler->uconv_in != NULL) {
1972         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1973                               flush);
1974     }
1975 #endif /* LIBXML_ICU_ENABLED */
1976     else {
1977         *outlen = 0;
1978         *inlen = 0;
1979         ret = -2;
1980     }
1981 
1982     return(ret);
1983 }
1984 
1985 /**
1986  * xmlEncOutputChunk:
1987  * @handler:  encoding handler
1988  * @out:  a pointer to an array of bytes to store the result
1989  * @outlen:  the length of @out
1990  * @in:  a pointer to an array of input bytes
1991  * @inlen:  the length of @in
1992  *
1993  * Returns 0 if success, or
1994  *     -1 by lack of space, or
1995  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1996  *        the result of transformation can't fit into the encoding we want), or
1997  *     -3 if there the last byte can't form a single output char.
1998  *     -4 if no output function was found.
1999  *
2000  * The value of @inlen after return is the number of octets consumed
2001  *     as the return value is 0, else unpredictable.
2002  * The value of @outlen after return is the number of octets produced.
2003  */
2004 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2005 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2006                   int *outlen, const unsigned char *in, int *inlen) {
2007     int ret;
2008 
2009     if (handler->output != NULL) {
2010         ret = handler->output(out, outlen, in, inlen);
2011         if (ret > 0)
2012            ret = 0;
2013     }
2014 #ifdef LIBXML_ICONV_ENABLED
2015     else if (handler->iconv_out != NULL) {
2016         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2017     }
2018 #endif /* LIBXML_ICONV_ENABLED */
2019 #ifdef LIBXML_ICU_ENABLED
2020     else if (handler->uconv_out != NULL) {
2021         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2022                               1);
2023     }
2024 #endif /* LIBXML_ICU_ENABLED */
2025     else {
2026         *outlen = 0;
2027         *inlen = 0;
2028         ret = -4;
2029     }
2030 
2031     return(ret);
2032 }
2033 
2034 /**
2035  * xmlCharEncFirstLineInt:
2036  * @handler:	char encoding transformation data structure
2037  * @out:  an xmlBuffer for the output.
2038  * @in:  an xmlBuffer for the input
2039  * @len:  number of bytes to convert for the first line, or -1
2040  *
2041  * Front-end for the encoding handler input function, but handle only
2042  * the very first line, i.e. limit itself to 45 chars.
2043  *
2044  * Returns the number of byte written if success, or
2045  *     -1 general error
2046  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2047  *        the result of transformation can't fit into the encoding we want), or
2048  */
2049 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2050 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2051                        xmlBufferPtr in, int len) {
2052     int ret;
2053     int written;
2054     int toconv;
2055 
2056     if (handler == NULL) return(-1);
2057     if (out == NULL) return(-1);
2058     if (in == NULL) return(-1);
2059 
2060     /* calculate space available */
2061     written = out->size - out->use - 1; /* count '\0' */
2062     toconv = in->use;
2063     /*
2064      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2065      * 45 chars should be sufficient to reach the end of the encoding
2066      * declaration without going too far inside the document content.
2067      * on UTF-16 this means 90bytes, on UCS4 this means 180
2068      * The actual value depending on guessed encoding is passed as @len
2069      * if provided
2070      */
2071     if (len >= 0) {
2072         if (toconv > len)
2073             toconv = len;
2074     } else {
2075         if (toconv > 180)
2076             toconv = 180;
2077     }
2078     if (toconv * 2 >= written) {
2079         xmlBufferGrow(out, toconv * 2);
2080 	written = out->size - out->use - 1;
2081     }
2082 
2083     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2084                            in->content, &toconv, 0);
2085     xmlBufferShrink(in, toconv);
2086     out->use += written;
2087     out->content[out->use] = 0;
2088     if (ret == -1) ret = -3;
2089 
2090 #ifdef DEBUG_ENCODING
2091     switch (ret) {
2092         case 0:
2093 	    xmlGenericError(xmlGenericErrorContext,
2094 		    "converted %d bytes to %d bytes of input\n",
2095 	            toconv, written);
2096 	    break;
2097         case -1:
2098 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2099 	            toconv, written, in->use);
2100 	    break;
2101         case -2:
2102 	    xmlGenericError(xmlGenericErrorContext,
2103 		    "input conversion failed due to input error\n");
2104 	    break;
2105         case -3:
2106 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2107 	            toconv, written, in->use);
2108 	    break;
2109 	default:
2110 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2111     }
2112 #endif /* DEBUG_ENCODING */
2113     /*
2114      * Ignore when input buffer is not on a boundary
2115      */
2116     if (ret == -3) ret = 0;
2117     if (ret == -1) ret = 0;
2118     return(written ? written : ret);
2119 }
2120 
2121 /**
2122  * xmlCharEncFirstLine:
2123  * @handler:	char encoding transformation data structure
2124  * @out:  an xmlBuffer for the output.
2125  * @in:  an xmlBuffer for the input
2126  *
2127  * Front-end for the encoding handler input function, but handle only
2128  * the very first line, i.e. limit itself to 45 chars.
2129  *
2130  * Returns the number of byte written if success, or
2131  *     -1 general error
2132  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2133  *        the result of transformation can't fit into the encoding we want), or
2134  */
2135 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2136 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2137                  xmlBufferPtr in) {
2138     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2139 }
2140 
2141 /**
2142  * xmlCharEncFirstLineInput:
2143  * @input: a parser input buffer
2144  * @len:  number of bytes to convert for the first line, or -1
2145  *
2146  * Front-end for the encoding handler input function, but handle only
2147  * the very first line. Point is that this is based on autodetection
2148  * of the encoding and once that first line is converted we may find
2149  * out that a different decoder is needed to process the input.
2150  *
2151  * Returns the number of byte written if success, or
2152  *     -1 general error
2153  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2154  *        the result of transformation can't fit into the encoding we want), or
2155  */
2156 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2157 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2158 {
2159     int ret;
2160     size_t written;
2161     size_t toconv;
2162     int c_in;
2163     int c_out;
2164     xmlBufPtr in;
2165     xmlBufPtr out;
2166 
2167     if ((input == NULL) || (input->encoder == NULL) ||
2168         (input->buffer == NULL) || (input->raw == NULL))
2169         return (-1);
2170     out = input->buffer;
2171     in = input->raw;
2172 
2173     toconv = xmlBufUse(in);
2174     if (toconv == 0)
2175         return (0);
2176     written = xmlBufAvail(out) - 1; /* count '\0' */
2177     /*
2178      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2179      * 45 chars should be sufficient to reach the end of the encoding
2180      * declaration without going too far inside the document content.
2181      * on UTF-16 this means 90bytes, on UCS4 this means 180
2182      * The actual value depending on guessed encoding is passed as @len
2183      * if provided
2184      */
2185     if (len >= 0) {
2186         if (toconv > (unsigned int) len)
2187             toconv = len;
2188     } else {
2189         if (toconv > 180)
2190             toconv = 180;
2191     }
2192     if (toconv * 2 >= written) {
2193         xmlBufGrow(out, toconv * 2);
2194         written = xmlBufAvail(out) - 1;
2195     }
2196     if (written > 360)
2197         written = 360;
2198 
2199     c_in = toconv;
2200     c_out = written;
2201     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2202                            xmlBufContent(in), &c_in, 0);
2203     xmlBufShrink(in, c_in);
2204     xmlBufAddLen(out, c_out);
2205     if (ret == -1)
2206         ret = -3;
2207 
2208     switch (ret) {
2209         case 0:
2210 #ifdef DEBUG_ENCODING
2211             xmlGenericError(xmlGenericErrorContext,
2212                             "converted %d bytes to %d bytes of input\n",
2213                             c_in, c_out);
2214 #endif
2215             break;
2216         case -1:
2217 #ifdef DEBUG_ENCODING
2218             xmlGenericError(xmlGenericErrorContext,
2219                          "converted %d bytes to %d bytes of input, %d left\n",
2220                             c_in, c_out, (int)xmlBufUse(in));
2221 #endif
2222             break;
2223         case -3:
2224 #ifdef DEBUG_ENCODING
2225             xmlGenericError(xmlGenericErrorContext,
2226                         "converted %d bytes to %d bytes of input, %d left\n",
2227                             c_in, c_out, (int)xmlBufUse(in));
2228 #endif
2229             break;
2230         case -2: {
2231             char buf[50];
2232             const xmlChar *content = xmlBufContent(in);
2233 
2234 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2235 		     content[0], content[1],
2236 		     content[2], content[3]);
2237 	    buf[49] = 0;
2238 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2239 		    "input conversion failed due to input error, bytes %s\n",
2240 		           buf);
2241         }
2242     }
2243     /*
2244      * Ignore when input buffer is not on a boundary
2245      */
2246     if (ret == -3) ret = 0;
2247     if (ret == -1) ret = 0;
2248     return(c_out ? c_out : ret);
2249 }
2250 
2251 /**
2252  * xmlCharEncInput:
2253  * @input: a parser input buffer
2254  * @flush: try to flush all the raw buffer
2255  *
2256  * Generic front-end for the encoding handler on parser input
2257  *
2258  * Returns the number of byte written if success, or
2259  *     -1 general error
2260  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2261  *        the result of transformation can't fit into the encoding we want), or
2262  */
2263 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2264 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2265 {
2266     int ret;
2267     size_t written;
2268     size_t toconv;
2269     int c_in;
2270     int c_out;
2271     xmlBufPtr in;
2272     xmlBufPtr out;
2273 
2274     if ((input == NULL) || (input->encoder == NULL) ||
2275         (input->buffer == NULL) || (input->raw == NULL))
2276         return (-1);
2277     out = input->buffer;
2278     in = input->raw;
2279 
2280     toconv = xmlBufUse(in);
2281     if (toconv == 0)
2282         return (0);
2283     if ((toconv > 64 * 1024) && (flush == 0))
2284         toconv = 64 * 1024;
2285     written = xmlBufAvail(out);
2286     if (written > 0)
2287         written--; /* count '\0' */
2288     if (toconv * 2 >= written) {
2289         xmlBufGrow(out, toconv * 2);
2290         written = xmlBufAvail(out);
2291         if (written > 0)
2292             written--; /* count '\0' */
2293     }
2294     if ((written > 128 * 1024) && (flush == 0))
2295         written = 128 * 1024;
2296 
2297     c_in = toconv;
2298     c_out = written;
2299     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2300                            xmlBufContent(in), &c_in, flush);
2301     xmlBufShrink(in, c_in);
2302     xmlBufAddLen(out, c_out);
2303     if (ret == -1)
2304         ret = -3;
2305 
2306     switch (ret) {
2307         case 0:
2308 #ifdef DEBUG_ENCODING
2309             xmlGenericError(xmlGenericErrorContext,
2310                             "converted %d bytes to %d bytes of input\n",
2311                             c_in, c_out);
2312 #endif
2313             break;
2314         case -1:
2315 #ifdef DEBUG_ENCODING
2316             xmlGenericError(xmlGenericErrorContext,
2317                          "converted %d bytes to %d bytes of input, %d left\n",
2318                             c_in, c_out, (int)xmlBufUse(in));
2319 #endif
2320             break;
2321         case -3:
2322 #ifdef DEBUG_ENCODING
2323             xmlGenericError(xmlGenericErrorContext,
2324                         "converted %d bytes to %d bytes of input, %d left\n",
2325                             c_in, c_out, (int)xmlBufUse(in));
2326 #endif
2327             break;
2328         case -2: {
2329             char buf[50];
2330             const xmlChar *content = xmlBufContent(in);
2331 
2332 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2333 		     content[0], content[1],
2334 		     content[2], content[3]);
2335 	    buf[49] = 0;
2336 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2337 		    "input conversion failed due to input error, bytes %s\n",
2338 		           buf);
2339         }
2340     }
2341     /*
2342      * Ignore when input buffer is not on a boundary
2343      */
2344     if (ret == -3)
2345         ret = 0;
2346     return (c_out? c_out : ret);
2347 }
2348 
2349 /**
2350  * xmlCharEncInFunc:
2351  * @handler:	char encoding transformation data structure
2352  * @out:  an xmlBuffer for the output.
2353  * @in:  an xmlBuffer for the input
2354  *
2355  * Generic front-end for the encoding handler input function
2356  *
2357  * Returns the number of byte written if success, or
2358  *     -1 general error
2359  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2360  *        the result of transformation can't fit into the encoding we want), or
2361  */
2362 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2363 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2364                  xmlBufferPtr in)
2365 {
2366     int ret;
2367     int written;
2368     int toconv;
2369 
2370     if (handler == NULL)
2371         return (-1);
2372     if (out == NULL)
2373         return (-1);
2374     if (in == NULL)
2375         return (-1);
2376 
2377     toconv = in->use;
2378     if (toconv == 0)
2379         return (0);
2380     written = out->size - out->use -1; /* count '\0' */
2381     if (toconv * 2 >= written) {
2382         xmlBufferGrow(out, out->size + toconv * 2);
2383         written = out->size - out->use - 1;
2384     }
2385     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2386                            in->content, &toconv, 1);
2387     xmlBufferShrink(in, toconv);
2388     out->use += written;
2389     out->content[out->use] = 0;
2390     if (ret == -1)
2391         ret = -3;
2392 
2393     switch (ret) {
2394         case 0:
2395 #ifdef DEBUG_ENCODING
2396             xmlGenericError(xmlGenericErrorContext,
2397                             "converted %d bytes to %d bytes of input\n",
2398                             toconv, written);
2399 #endif
2400             break;
2401         case -1:
2402 #ifdef DEBUG_ENCODING
2403             xmlGenericError(xmlGenericErrorContext,
2404                          "converted %d bytes to %d bytes of input, %d left\n",
2405                             toconv, written, in->use);
2406 #endif
2407             break;
2408         case -3:
2409 #ifdef DEBUG_ENCODING
2410             xmlGenericError(xmlGenericErrorContext,
2411                         "converted %d bytes to %d bytes of input, %d left\n",
2412                             toconv, written, in->use);
2413 #endif
2414             break;
2415         case -2: {
2416             char buf[50];
2417 
2418 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2419 		     in->content[0], in->content[1],
2420 		     in->content[2], in->content[3]);
2421 	    buf[49] = 0;
2422 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2423 		    "input conversion failed due to input error, bytes %s\n",
2424 		           buf);
2425         }
2426     }
2427     /*
2428      * Ignore when input buffer is not on a boundary
2429      */
2430     if (ret == -3)
2431         ret = 0;
2432     return (written? written : ret);
2433 }
2434 
2435 #ifdef LIBXML_OUTPUT_ENABLED
2436 /**
2437  * xmlCharEncOutput:
2438  * @output: a parser output buffer
2439  * @init: is this an initialization call without data
2440  *
2441  * Generic front-end for the encoding handler on parser output
2442  * a first call with @init == 1 has to be made first to initiate the
2443  * output in case of non-stateless encoding needing to initiate their
2444  * state or the output (like the BOM in UTF16).
2445  * In case of UTF8 sequence conversion errors for the given encoder,
2446  * the content will be automatically remapped to a CharRef sequence.
2447  *
2448  * Returns the number of byte written if success, or
2449  *     -1 general error
2450  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2451  *        the result of transformation can't fit into the encoding we want), or
2452  */
2453 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2454 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2455 {
2456     int ret;
2457     size_t written;
2458     int writtentot = 0;
2459     size_t toconv;
2460     int c_in;
2461     int c_out;
2462     xmlBufPtr in;
2463     xmlBufPtr out;
2464 
2465     if ((output == NULL) || (output->encoder == NULL) ||
2466         (output->buffer == NULL) || (output->conv == NULL))
2467         return (-1);
2468     out = output->conv;
2469     in = output->buffer;
2470 
2471 retry:
2472 
2473     written = xmlBufAvail(out);
2474     if (written > 0)
2475         written--; /* count '\0' */
2476 
2477     /*
2478      * First specific handling of the initialization call
2479      */
2480     if (init) {
2481         c_in = 0;
2482         c_out = written;
2483         /* TODO: Check return value. */
2484         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2485                           NULL, &c_in);
2486         xmlBufAddLen(out, c_out);
2487 #ifdef DEBUG_ENCODING
2488 	xmlGenericError(xmlGenericErrorContext,
2489 		"initialized encoder\n");
2490 #endif
2491         return(c_out);
2492     }
2493 
2494     /*
2495      * Conversion itself.
2496      */
2497     toconv = xmlBufUse(in);
2498     if (toconv == 0)
2499         return (writtentot);
2500     if (toconv > 64 * 1024)
2501         toconv = 64 * 1024;
2502     if (toconv * 4 >= written) {
2503         xmlBufGrow(out, toconv * 4);
2504         written = xmlBufAvail(out) - 1;
2505     }
2506     if (written > 256 * 1024)
2507         written = 256 * 1024;
2508 
2509     c_in = toconv;
2510     c_out = written;
2511     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2512                             xmlBufContent(in), &c_in);
2513     xmlBufShrink(in, c_in);
2514     xmlBufAddLen(out, c_out);
2515     writtentot += c_out;
2516     if (ret == -1) {
2517         if (c_out > 0) {
2518             /* Can be a limitation of iconv or uconv */
2519             goto retry;
2520         }
2521         ret = -3;
2522     }
2523 
2524     /*
2525      * Attempt to handle error cases
2526      */
2527     switch (ret) {
2528         case 0:
2529 #ifdef DEBUG_ENCODING
2530 	    xmlGenericError(xmlGenericErrorContext,
2531 		    "converted %d bytes to %d bytes of output\n",
2532 	            c_in, c_out);
2533 #endif
2534 	    break;
2535         case -1:
2536 #ifdef DEBUG_ENCODING
2537 	    xmlGenericError(xmlGenericErrorContext,
2538 		    "output conversion failed by lack of space\n");
2539 #endif
2540 	    break;
2541         case -3:
2542 #ifdef DEBUG_ENCODING
2543 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2544 	            c_in, c_out, (int) xmlBufUse(in));
2545 #endif
2546 	    break;
2547         case -4:
2548             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2549                            "xmlCharEncOutFunc: no output function !\n", NULL);
2550             ret = -1;
2551             break;
2552         case -2: {
2553 	    xmlChar charref[20];
2554 	    int len = (int) xmlBufUse(in);
2555             xmlChar *content = xmlBufContent(in);
2556 	    int cur, charrefLen;
2557 
2558 	    cur = xmlGetUTF8Char(content, &len);
2559 	    if (cur <= 0)
2560                 break;
2561 
2562 #ifdef DEBUG_ENCODING
2563             xmlGenericError(xmlGenericErrorContext,
2564                     "handling output conversion error\n");
2565             xmlGenericError(xmlGenericErrorContext,
2566                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2567                     content[0], content[1],
2568                     content[2], content[3]);
2569 #endif
2570             /*
2571              * Removes the UTF8 sequence, and replace it by a charref
2572              * and continue the transcoding phase, hoping the error
2573              * did not mangle the encoder state.
2574              */
2575             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2576                              "&#%d;", cur);
2577             xmlBufShrink(in, len);
2578             xmlBufGrow(out, charrefLen * 4);
2579             c_out = xmlBufAvail(out) - 1;
2580             c_in = charrefLen;
2581             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2582                                     charref, &c_in);
2583 
2584 	    if ((ret < 0) || (c_in != charrefLen)) {
2585 		char buf[50];
2586 
2587 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2588 			 content[0], content[1],
2589 			 content[2], content[3]);
2590 		buf[49] = 0;
2591 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2592 		    "output conversion failed due to conv error, bytes %s\n",
2593 			       buf);
2594 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2595 		    content[0] = ' ';
2596                 break;
2597 	    }
2598 
2599             xmlBufAddLen(out, c_out);
2600             writtentot += c_out;
2601             goto retry;
2602 	}
2603     }
2604     return(writtentot ? writtentot : ret);
2605 }
2606 #endif
2607 
2608 /**
2609  * xmlCharEncOutFunc:
2610  * @handler:	char encoding transformation data structure
2611  * @out:  an xmlBuffer for the output.
2612  * @in:  an xmlBuffer for the input
2613  *
2614  * Generic front-end for the encoding handler output function
2615  * a first call with @in == NULL has to be made firs to initiate the
2616  * output in case of non-stateless encoding needing to initiate their
2617  * state or the output (like the BOM in UTF16).
2618  * In case of UTF8 sequence conversion errors for the given encoder,
2619  * the content will be automatically remapped to a CharRef sequence.
2620  *
2621  * Returns the number of byte written if success, or
2622  *     -1 general error
2623  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2624  *        the result of transformation can't fit into the encoding we want), or
2625  */
2626 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2627 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2628                   xmlBufferPtr in) {
2629     int ret;
2630     int written;
2631     int writtentot = 0;
2632     int toconv;
2633 
2634     if (handler == NULL) return(-1);
2635     if (out == NULL) return(-1);
2636 
2637 retry:
2638 
2639     written = out->size - out->use;
2640 
2641     if (written > 0)
2642 	written--; /* Gennady: count '/0' */
2643 
2644     /*
2645      * First specific handling of in = NULL, i.e. the initialization call
2646      */
2647     if (in == NULL) {
2648         toconv = 0;
2649         /* TODO: Check return value. */
2650         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2651                           NULL, &toconv);
2652         out->use += written;
2653         out->content[out->use] = 0;
2654 #ifdef DEBUG_ENCODING
2655 	xmlGenericError(xmlGenericErrorContext,
2656 		"initialized encoder\n");
2657 #endif
2658         return(0);
2659     }
2660 
2661     /*
2662      * Conversion itself.
2663      */
2664     toconv = in->use;
2665     if (toconv == 0)
2666 	return(0);
2667     if (toconv * 4 >= written) {
2668         xmlBufferGrow(out, toconv * 4);
2669 	written = out->size - out->use - 1;
2670     }
2671     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2672                             in->content, &toconv);
2673     xmlBufferShrink(in, toconv);
2674     out->use += written;
2675     writtentot += written;
2676     out->content[out->use] = 0;
2677     if (ret == -1) {
2678         if (written > 0) {
2679             /* Can be a limitation of iconv or uconv */
2680             goto retry;
2681         }
2682         ret = -3;
2683     }
2684 
2685     /*
2686      * Attempt to handle error cases
2687      */
2688     switch (ret) {
2689         case 0:
2690 #ifdef DEBUG_ENCODING
2691 	    xmlGenericError(xmlGenericErrorContext,
2692 		    "converted %d bytes to %d bytes of output\n",
2693 	            toconv, written);
2694 #endif
2695 	    break;
2696         case -1:
2697 #ifdef DEBUG_ENCODING
2698 	    xmlGenericError(xmlGenericErrorContext,
2699 		    "output conversion failed by lack of space\n");
2700 #endif
2701 	    break;
2702         case -3:
2703 #ifdef DEBUG_ENCODING
2704 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2705 	            toconv, written, in->use);
2706 #endif
2707 	    break;
2708         case -4:
2709 	    xmlEncodingErr(XML_I18N_NO_OUTPUT,
2710 		           "xmlCharEncOutFunc: no output function !\n", NULL);
2711 	    ret = -1;
2712             break;
2713         case -2: {
2714 	    xmlChar charref[20];
2715 	    int len = in->use;
2716 	    const xmlChar *utf = (const xmlChar *) in->content;
2717 	    int cur, charrefLen;
2718 
2719 	    cur = xmlGetUTF8Char(utf, &len);
2720 	    if (cur <= 0)
2721                 break;
2722 
2723 #ifdef DEBUG_ENCODING
2724             xmlGenericError(xmlGenericErrorContext,
2725                     "handling output conversion error\n");
2726             xmlGenericError(xmlGenericErrorContext,
2727                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2728                     in->content[0], in->content[1],
2729                     in->content[2], in->content[3]);
2730 #endif
2731             /*
2732              * Removes the UTF8 sequence, and replace it by a charref
2733              * and continue the transcoding phase, hoping the error
2734              * did not mangle the encoder state.
2735              */
2736             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2737                              "&#%d;", cur);
2738             xmlBufferShrink(in, len);
2739             xmlBufferGrow(out, charrefLen * 4);
2740 	    written = out->size - out->use - 1;
2741             toconv = charrefLen;
2742             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2743                                     charref, &toconv);
2744 
2745 	    if ((ret < 0) || (toconv != charrefLen)) {
2746 		char buf[50];
2747 
2748 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2749 			 in->content[0], in->content[1],
2750 			 in->content[2], in->content[3]);
2751 		buf[49] = 0;
2752 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2753 		    "output conversion failed due to conv error, bytes %s\n",
2754 			       buf);
2755 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2756 		    in->content[0] = ' ';
2757 	        break;
2758 	    }
2759 
2760             out->use += written;
2761             writtentot += written;
2762             out->content[out->use] = 0;
2763             goto retry;
2764 	}
2765     }
2766     return(writtentot ? writtentot : ret);
2767 }
2768 
2769 /**
2770  * xmlCharEncCloseFunc:
2771  * @handler:	char encoding transformation data structure
2772  *
2773  * Generic front-end for encoding handler close function
2774  *
2775  * Returns 0 if success, or -1 in case of error
2776  */
2777 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2778 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2779     int ret = 0;
2780     int tofree = 0;
2781     int i, handler_in_list = 0;
2782 
2783     if (handler == NULL) return(-1);
2784     if (handler->name == NULL) return(-1);
2785     if (handlers != NULL) {
2786         for (i = 0;i < nbCharEncodingHandler; i++) {
2787             if (handler == handlers[i]) {
2788 	        handler_in_list = 1;
2789 		break;
2790 	    }
2791 	}
2792     }
2793 #ifdef LIBXML_ICONV_ENABLED
2794     /*
2795      * Iconv handlers can be used only once, free the whole block.
2796      * and the associated icon resources.
2797      */
2798     if ((handler_in_list == 0) &&
2799         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2800         tofree = 1;
2801 	if (handler->iconv_out != NULL) {
2802 	    if (iconv_close(handler->iconv_out))
2803 		ret = -1;
2804 	    handler->iconv_out = NULL;
2805 	}
2806 	if (handler->iconv_in != NULL) {
2807 	    if (iconv_close(handler->iconv_in))
2808 		ret = -1;
2809 	    handler->iconv_in = NULL;
2810 	}
2811     }
2812 #endif /* LIBXML_ICONV_ENABLED */
2813 #ifdef LIBXML_ICU_ENABLED
2814     if ((handler_in_list == 0) &&
2815         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2816         tofree = 1;
2817 	if (handler->uconv_out != NULL) {
2818 	    closeIcuConverter(handler->uconv_out);
2819 	    handler->uconv_out = NULL;
2820 	}
2821 	if (handler->uconv_in != NULL) {
2822 	    closeIcuConverter(handler->uconv_in);
2823 	    handler->uconv_in = NULL;
2824 	}
2825     }
2826 #endif
2827     if (tofree) {
2828         /* free up only dynamic handlers iconv/uconv */
2829         if (handler->name != NULL)
2830             xmlFree(handler->name);
2831         handler->name = NULL;
2832         xmlFree(handler);
2833     }
2834 #ifdef DEBUG_ENCODING
2835     if (ret)
2836         xmlGenericError(xmlGenericErrorContext,
2837 		"failed to close the encoding handler\n");
2838     else
2839         xmlGenericError(xmlGenericErrorContext,
2840 		"closed the encoding handler\n");
2841 #endif
2842 
2843     return(ret);
2844 }
2845 
2846 /**
2847  * xmlByteConsumed:
2848  * @ctxt: an XML parser context
2849  *
2850  * This function provides the current index of the parser relative
2851  * to the start of the current entity. This function is computed in
2852  * bytes from the beginning starting at zero and finishing at the
2853  * size in byte of the file if parsing a file. The function is
2854  * of constant cost if the input is UTF-8 but can be costly if run
2855  * on non-UTF-8 input.
2856  *
2857  * Returns the index in bytes from the beginning of the entity or -1
2858  *         in case the index could not be computed.
2859  */
2860 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2861 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2862     xmlParserInputPtr in;
2863 
2864     if (ctxt == NULL) return(-1);
2865     in = ctxt->input;
2866     if (in == NULL)  return(-1);
2867     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2868         unsigned int unused = 0;
2869 	xmlCharEncodingHandler * handler = in->buf->encoder;
2870         /*
2871 	 * Encoding conversion, compute the number of unused original
2872 	 * bytes from the input not consumed and subtract that from
2873 	 * the raw consumed value, this is not a cheap operation
2874 	 */
2875         if (in->end - in->cur > 0) {
2876 	    unsigned char convbuf[32000];
2877 	    const unsigned char *cur = (const unsigned char *)in->cur;
2878 	    int toconv = in->end - in->cur, written = 32000;
2879 
2880 	    int ret;
2881 
2882             do {
2883                 toconv = in->end - cur;
2884                 written = 32000;
2885                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2886                                         cur, &toconv);
2887                 if (ret < 0) {
2888                     if (written > 0)
2889                         ret = -2;
2890                     else
2891                         return(-1);
2892                 }
2893                 unused += written;
2894                 cur += toconv;
2895             } while (ret == -2);
2896 	}
2897 	if (in->buf->rawconsumed < unused)
2898 	    return(-1);
2899 	return(in->buf->rawconsumed - unused);
2900     }
2901     return(in->consumed + (in->cur - in->base));
2902 }
2903 
2904 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2905 #ifdef LIBXML_ISO8859X_ENABLED
2906 
2907 /**
2908  * UTF8ToISO8859x:
2909  * @out:  a pointer to an array of bytes to store the result
2910  * @outlen:  the length of @out
2911  * @in:  a pointer to an array of UTF-8 chars
2912  * @inlen:  the length of @in
2913  * @xlattable: the 2-level transcoding table
2914  *
2915  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2916  * block of chars out.
2917  *
2918  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2919  * The value of @inlen after return is the number of octets consumed
2920  *     as the return value is positive, else unpredictable.
2921  * The value of @outlen after return is the number of octets consumed.
2922  */
2923 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2924 UTF8ToISO8859x(unsigned char* out, int *outlen,
2925               const unsigned char* in, int *inlen,
2926               unsigned char const *xlattable) {
2927     const unsigned char* outstart = out;
2928     const unsigned char* inend;
2929     const unsigned char* instart = in;
2930     const unsigned char* processed = in;
2931 
2932     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2933         (xlattable == NULL))
2934 	return(-1);
2935     if (in == NULL) {
2936         /*
2937         * initialization nothing to do
2938         */
2939         *outlen = 0;
2940         *inlen = 0;
2941         return(0);
2942     }
2943     inend = in + (*inlen);
2944     while (in < inend) {
2945         unsigned char d = *in++;
2946         if  (d < 0x80)  {
2947             *out++ = d;
2948         } else if (d < 0xC0) {
2949             /* trailing byte in leading position */
2950             *outlen = out - outstart;
2951             *inlen = processed - instart;
2952             return(-2);
2953         } else if (d < 0xE0) {
2954             unsigned char c;
2955             if (!(in < inend)) {
2956                 /* trailing byte not in input buffer */
2957                 *outlen = out - outstart;
2958                 *inlen = processed - instart;
2959                 return(-3);
2960             }
2961             c = *in++;
2962             if ((c & 0xC0) != 0x80) {
2963                 /* not a trailing byte */
2964                 *outlen = out - outstart;
2965                 *inlen = processed - instart;
2966                 return(-2);
2967             }
2968             c = c & 0x3F;
2969             d = d & 0x1F;
2970             d = xlattable [48 + c + xlattable [d] * 64];
2971             if (d == 0) {
2972                 /* not in character set */
2973                 *outlen = out - outstart;
2974                 *inlen = processed - instart;
2975                 return(-2);
2976             }
2977             *out++ = d;
2978         } else if (d < 0xF0) {
2979             unsigned char c1;
2980             unsigned char c2;
2981             if (!(in < inend - 1)) {
2982                 /* trailing bytes not in input buffer */
2983                 *outlen = out - outstart;
2984                 *inlen = processed - instart;
2985                 return(-3);
2986             }
2987             c1 = *in++;
2988             if ((c1 & 0xC0) != 0x80) {
2989                 /* not a trailing byte (c1) */
2990                 *outlen = out - outstart;
2991                 *inlen = processed - instart;
2992                 return(-2);
2993             }
2994             c2 = *in++;
2995             if ((c2 & 0xC0) != 0x80) {
2996                 /* not a trailing byte (c2) */
2997                 *outlen = out - outstart;
2998                 *inlen = processed - instart;
2999                 return(-2);
3000             }
3001             c1 = c1 & 0x3F;
3002             c2 = c2 & 0x3F;
3003 	    d = d & 0x0F;
3004 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
3005 			xlattable [32 + d] * 64] * 64];
3006             if (d == 0) {
3007                 /* not in character set */
3008                 *outlen = out - outstart;
3009                 *inlen = processed - instart;
3010                 return(-2);
3011             }
3012             *out++ = d;
3013         } else {
3014             /* cannot transcode >= U+010000 */
3015             *outlen = out - outstart;
3016             *inlen = processed - instart;
3017             return(-2);
3018         }
3019         processed = in;
3020     }
3021     *outlen = out - outstart;
3022     *inlen = processed - instart;
3023     return(*outlen);
3024 }
3025 
3026 /**
3027  * ISO8859xToUTF8
3028  * @out:  a pointer to an array of bytes to store the result
3029  * @outlen:  the length of @out
3030  * @in:  a pointer to an array of ISO Latin 1 chars
3031  * @inlen:  the length of @in
3032  *
3033  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3034  * block of chars out.
3035  * Returns 0 if success, or -1 otherwise
3036  * The value of @inlen after return is the number of octets consumed
3037  * The value of @outlen after return is the number of octets produced.
3038  */
3039 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3040 ISO8859xToUTF8(unsigned char* out, int *outlen,
3041               const unsigned char* in, int *inlen,
3042               unsigned short const *unicodetable) {
3043     unsigned char* outstart = out;
3044     unsigned char* outend;
3045     const unsigned char* instart = in;
3046     const unsigned char* inend;
3047     const unsigned char* instop;
3048     unsigned int c;
3049 
3050     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3051         (in == NULL) || (unicodetable == NULL))
3052 	return(-1);
3053     outend = out + *outlen;
3054     inend = in + *inlen;
3055     instop = inend;
3056 
3057     while ((in < inend) && (out < outend - 2)) {
3058         if (*in >= 0x80) {
3059             c = unicodetable [*in - 0x80];
3060             if (c == 0) {
3061                 /* undefined code point */
3062                 *outlen = out - outstart;
3063                 *inlen = in - instart;
3064                 return (-1);
3065             }
3066             if (c < 0x800) {
3067                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3068                 *out++ = (c & 0x3F) | 0x80;
3069             } else {
3070                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3071                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3072                 *out++ = (c & 0x3F) | 0x80;
3073             }
3074             ++in;
3075         }
3076         if (instop - in > outend - out) instop = in + (outend - out);
3077         while ((*in < 0x80) && (in < instop)) {
3078             *out++ = *in++;
3079         }
3080     }
3081     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3082         *out++ =  *in++;
3083     }
3084     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3085         *out++ =  *in++;
3086     }
3087     *outlen = out - outstart;
3088     *inlen = in - instart;
3089     return (*outlen);
3090 }
3091 
3092 
3093 /************************************************************************
3094  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3095  ************************************************************************/
3096 
3097 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3098     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3099     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3100     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3101     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3102     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3103     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3104     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3105     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3106     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3107     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3108     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3109     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3110     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3111     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3112     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3113     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3114 };
3115 
3116 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3117     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3118     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3119     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3120     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3121     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3125     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3126     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3127     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3128     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3129     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3130     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3132     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3133     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3134     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3137     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3138     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3139     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3140     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3141     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3142     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3143     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3144 };
3145 
3146 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3147     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3148     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3149     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3150     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3151     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3152     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3153     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3154     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3155     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3156     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3157     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3158     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3159     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3160     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3161     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3162     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3163 };
3164 
3165 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3166     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3167     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3169     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3170     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3174     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3175     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3176     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3177     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3178     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3179     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3180     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3183     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3191     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3192     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3193     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3194     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3195     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3196     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3197 };
3198 
3199 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3200     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3201     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3202     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3203     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3204     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3205     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3206     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3207     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3208     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3209     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3210     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3211     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3212     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3213     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3214     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3215     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3216 };
3217 
3218 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3219     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3220     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3222     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3223     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3227     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3228     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3229     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3230     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3231     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3232     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3233     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3234     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3235     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3236     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3237     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3238     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3239     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3240     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3243     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3244     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3245     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3246 };
3247 
3248 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3249     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3250     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3251     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3252     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3253     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3254     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3255     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3256     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3257     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3258     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3259     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3260     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3261     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3262     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3263     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3264     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3265 };
3266 
3267 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3268     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3269     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3276     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3277     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3278     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3280     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3281     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3282     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3283     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3284     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3285     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 };
3296 
3297 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3298     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3299     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3300     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3301     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3302     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3303     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3304     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3305     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3306     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3307     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3308     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3309     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3310     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3311     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3312     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3313     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3314 };
3315 
3316 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3317     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3319     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3325     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3326     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3327     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3329     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3330     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3333     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3334     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3335     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3336     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3337     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340 };
3341 
3342 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3343     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3344     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3345     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3346     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3347     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3348     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3349     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3350     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3351     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3352     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3353     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3354     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3355     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3356     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3357     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3358     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3359 };
3360 
3361 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3362     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3363     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3370     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3371     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3372     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3373     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3374     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3379     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3386     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3387     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3388     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3389     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3390     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3391     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3392     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3393 };
3394 
3395 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3396     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3397     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3398     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3399     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3400     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3401     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3402     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3403     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3404     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3405     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3406     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3407     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3408     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3409     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3410     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3411     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3412 };
3413 
3414 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3415     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3417     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3423     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3424     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3425     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3426     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3427     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3432     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3434     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3439     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3440     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3444     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3445     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446 };
3447 
3448 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3449     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3450     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3451     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3452     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3453     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3454     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3455     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3456     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3457     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3458     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3459     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3460     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3461     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3462     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3463     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3464     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3465 };
3466 
3467 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3468     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3469     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3476     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3477     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3478     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3479     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3480     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3481     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3482     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3483     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3485     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3489     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491 };
3492 
3493 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3494     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3495     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3496     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3497     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3498     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3499     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3500     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3501     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3502     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3503     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3504     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3505     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3506     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3507     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3508     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3509     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3510 };
3511 
3512 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3513     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3516     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3521     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3522     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3523     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3524     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3525     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3526     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3527     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3528     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3529     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3531     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3532     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3541     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3542     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3543     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3544 };
3545 
3546 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3547     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3548     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3549     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3550     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3551     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3552     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3553     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3554     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3555     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3556     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3557     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3558     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3559     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3560     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3561     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3562     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3563 };
3564 
3565 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3566     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3574     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3575     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3578     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3581     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3582     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3583     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3584     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3585     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3586     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3587     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3588     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3590     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3591     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593 };
3594 
3595 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3596     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3597     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3598     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3599     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3600     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3601     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3602     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3603     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3604     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3605     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3606     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3607     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3608     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3609     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3610     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3611     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3612 };
3613 
3614 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3615     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3623     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3624     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3625     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3626     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3627     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3632     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3635     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3636     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3637     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3638     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3639     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3640     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3641     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3642     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3643     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3644     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3645     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3646 };
3647 
3648 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3649     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3650     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3651     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3652     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3653     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3654     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3655     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3656     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3657     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3658     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3659     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3660     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3661     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3662     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3663     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3664     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3665 };
3666 
3667 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3668     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3676     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3677     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3678     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3683     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3684     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3685     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3688     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3703     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3705     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3706     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3708     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3709     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3710     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3711 };
3712 
3713 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3714     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3715     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3716     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3717     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3718     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3719     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3720     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3721     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3722     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3723     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3724     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3725     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3726     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3727     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3728     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3729     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3730 };
3731 
3732 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3733     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3734     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3735     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3736     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3741     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3742     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3743     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3744     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3756     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3757     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3758     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3759     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3760 };
3761 
3762 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3763     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3764     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3765     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3766     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3767     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3768     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3769     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3770     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3771     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3772     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3773     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3774     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3775     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3776     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3777     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3778     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3779 };
3780 
3781 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3782     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3783     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3784     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3785     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3786     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3790     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3791     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3792     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3793     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3794     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3795     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3796     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3797     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3798     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3799     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3801     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3808     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3811     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3814     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3815     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3818     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3819     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3820     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3821 };
3822 
3823 
3824 /*
3825  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3826  */
3827 
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3828 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3829     const unsigned char* in, int *inlen) {
3830     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3831 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3832 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3833     const unsigned char* in, int *inlen) {
3834     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3835 }
3836 
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3837 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3838     const unsigned char* in, int *inlen) {
3839     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3840 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3841 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3842     const unsigned char* in, int *inlen) {
3843     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3844 }
3845 
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3846 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3847     const unsigned char* in, int *inlen) {
3848     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3849 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3850 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3851     const unsigned char* in, int *inlen) {
3852     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3853 }
3854 
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3855 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3856     const unsigned char* in, int *inlen) {
3857     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3858 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3859 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3860     const unsigned char* in, int *inlen) {
3861     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3862 }
3863 
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3864 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3865     const unsigned char* in, int *inlen) {
3866     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3867 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3868 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3869     const unsigned char* in, int *inlen) {
3870     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3871 }
3872 
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3873 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3874     const unsigned char* in, int *inlen) {
3875     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3876 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3877 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3878     const unsigned char* in, int *inlen) {
3879     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3880 }
3881 
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3882 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3883     const unsigned char* in, int *inlen) {
3884     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3885 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3886 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3887     const unsigned char* in, int *inlen) {
3888     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3889 }
3890 
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3891 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3892     const unsigned char* in, int *inlen) {
3893     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3894 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3895 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3896     const unsigned char* in, int *inlen) {
3897     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3898 }
3899 
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3900 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3901     const unsigned char* in, int *inlen) {
3902     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3903 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3904 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3905     const unsigned char* in, int *inlen) {
3906     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3907 }
3908 
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3909 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3910     const unsigned char* in, int *inlen) {
3911     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3912 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3913 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3914     const unsigned char* in, int *inlen) {
3915     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3916 }
3917 
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3918 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3919     const unsigned char* in, int *inlen) {
3920     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3921 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3922 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3923     const unsigned char* in, int *inlen) {
3924     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3925 }
3926 
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3927 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3928     const unsigned char* in, int *inlen) {
3929     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3930 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3931 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3932     const unsigned char* in, int *inlen) {
3933     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3934 }
3935 
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3936 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3937     const unsigned char* in, int *inlen) {
3938     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3939 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3940 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3941     const unsigned char* in, int *inlen) {
3942     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3943 }
3944 
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3945 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3946     const unsigned char* in, int *inlen) {
3947     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3948 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3949 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3950     const unsigned char* in, int *inlen) {
3951     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3952 }
3953 
3954 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3955 xmlRegisterCharEncodingHandlersISO8859x (void) {
3956     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3957     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3958     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3959     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3960     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3961     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3962     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3963     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3964     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3965     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3966     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3967     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3968     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3969     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3970 }
3971 
3972 #endif
3973 #endif
3974 
3975 #define bottom_encoding
3976 #include "elfgcchack.h"
3977